Example #1
0
def handle_course_import_exception(courselike_key,
                                   exception,
                                   status,
                                   known=True):
    """
    Handle course import exception and fail task status.

    Arguments:
        courselike_key: A locator identifies a course resource.
        exception: Exception object
        status: UserTaskStatus object.
        known: boolean indicating if this is a known failure or unknown.
    """
    exception_message = str(exception)
    log_prefix = f"Course import {courselike_key}:"
    LOGGER.exception(
        f"{log_prefix} Error while importing course: {exception_message}")
    task_fail_message = UserErrors.UNKNOWN_ERROR_IN_IMPORT
    monitor_import_failure(courselike_key, status.state, exception=exception)

    if known:
        task_fail_message = exception_message

    if status.state != UserTaskStatus.FAILED:
        status.fail(task_fail_message)
Example #2
0
    def try_load_course(self,
                        course_dir,
                        course_ids=None,
                        target_course_id=None):
        '''
        Load a course, keeping track of errors as we go along. If course_ids is not None,
        then reject the course unless its id is in course_ids.
        '''
        # Special-case code here, since we don't have a location for the
        # course before it loads.
        # So, make a tracker to track load-time errors, then put in the right
        # place after the course loads and we have its location
        errorlog = make_error_tracker()
        course_descriptor = None
        try:
            course_descriptor = self.load_course(course_dir, course_ids,
                                                 errorlog.tracker,
                                                 target_course_id)
        except Exception as exc:  # pylint: disable=broad-except
            msg = f'Course import {target_course_id}: ERROR: Failed to load courselike "{course_dir}": {str(exc)}'
            log.exception(msg)
            errorlog.tracker(msg)
            self.errored_courses[course_dir] = errorlog
            monitor_import_failure(target_course_id, 'Updating', exception=exc)

        if course_descriptor is None:
            pass
        elif isinstance(course_descriptor, ErrorBlock):
            # Didn't load course.  Instead, save the errors elsewhere.
            self.errored_courses[course_dir] = errorlog
        else:
            self.courses[course_dir] = course_descriptor
            course_descriptor.parent = None
            course_id = self.id_from_descriptor(course_descriptor)
            self._course_errors[course_id] = errorlog
Example #3
0
    def verify_root_name_exists(course_dir, root_name):
        """Verify root xml file exists."""
        def get_all_files(directory):
            """
            For each file in the directory, yield a 2-tuple of (file-name,
            directory-path)
            """
            for directory_path, _dirnames, filenames in os.walk(directory):
                for filename in filenames:
                    yield (filename, directory_path)

        def get_dir_for_filename(directory, filename):
            """
            Returns the directory path for the first file found in the directory
            with the given name.  If there is no file in the directory with
            the specified name, return None.
            """
            for name, directory_path in get_all_files(directory):
                if name == filename:
                    return directory_path
            return None

        dirpath = get_dir_for_filename(course_dir, root_name)
        if not dirpath:
            message = UserErrors.FILE_MISSING.format(root_name)
            with translation_language(language):
                self.status.fail(message)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key,
                                   current_step,
                                   message=message)
            return
        return dirpath
Example #4
0
 def validate_user():
     """Validate if the user exists otherwise log error. """
     try:
         return User.objects.get(pk=user_id)
     except User.DoesNotExist as exc:
         with translation_language(language):
             self.status.fail(UserErrors.USER_PERMISSION_DENIED)
         LOGGER.error(f'{log_prefix}: Unknown User: {user_id}')
         monitor_import_failure(courselike_key, current_step, exception=exc)
         return
Example #5
0
 def user_has_access(user):
     """Return True if user has studio write access to the given course."""
     has_access = has_course_author_access(user, courselike_key)
     if not has_access:
         message = f'User permission denied: {user.username}'
         with translation_language(language):
             self.status.fail(UserErrors.COURSE_PERMISSION_DENIED)
         LOGGER.error(f'{log_prefix}: {message}')
         monitor_import_failure(courselike_key, current_step, message=message)
     return has_access
Example #6
0
    def file_exists_in_storage():
        """Verify archive path exists in storage."""
        archive_path_exists = course_import_export_storage.exists(archive_path)

        if not archive_path_exists:
            message = f'Uploaded file {archive_path} not found'
            with translation_language(language):
                self.status.fail(UserErrors.FILE_NOT_FOUND)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key, current_step, message=message)
        return archive_path_exists
Example #7
0
    def file_is_supported():
        """Check if it is a supported file."""
        file_is_valid = archive_name.endswith('.tar.gz')

        if not file_is_valid:
            message = f'Unsupported file {archive_name}'
            with translation_language(language):
                self.status.fail(UserErrors.INVALID_FILE_TYPE)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key, current_step, message=message)
        return file_is_valid
Example #8
0
    def file_exists_in_storage():
        archive_path_exists = course_import_export_storage.exists(archive_path)

        if not archive_path_exists:
            message = f'Uploaded file {archive_path} not found'
            with translation_language(language):
                self.status.fail(_('Tar file not found'))
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key,
                                   current_step,
                                   message=message)
        return archive_path_exists
Example #9
0
def validate_course_olx(courselike_key, course_dir, status):
    """
    Validates course olx and records the errors as an artifact.

    Arguments:
        courselike_key: A locator identifies a course resource.
        course_dir: complete path to the course olx
        status: UserTaskStatus object.
    """
    olx_is_valid = True
    log_prefix = f'Course import {courselike_key}'
    validation_failed_mesg = 'CourseOlx validation failed.'
    is_library = isinstance(courselike_key, LibraryLocator)

    if is_library:
        return olx_is_valid

    if not course_import_olx_validation_is_enabled():
        return olx_is_valid
    try:
        __, errorstore, __ = olxcleaner.validate(
            filename=course_dir,
            steps=settings.COURSE_OLX_VALIDATION_STAGE,
            ignore=settings.COURSE_OLX_VALIDATION_IGNORE_LIST,
            allowed_xblocks=ALL_ALLOWED_XBLOCKS)
    except Exception:  # pylint: disable=broad-except
        LOGGER.exception(f'{log_prefix}: CourseOlx could not be validated')
        return olx_is_valid

    has_errors = errorstore.return_error(ErrorLevel.ERROR.value)
    if not has_errors:
        return olx_is_valid

    LOGGER.error(f'{log_prefix}: {validation_failed_mesg}')
    log_errors_to_artifact(errorstore, status)

    if bypass_olx_failure_enabled():
        return olx_is_valid

    monitor_import_failure(courselike_key,
                           status.state,
                           message=validation_failed_mesg)
    status.fail(UserErrors.OLX_VALIDATION_FAILED)
    return False
Example #10
0
def _write_chunk(request, courselike_key):
    """
    Write the OLX file data chunk from the given request to the local filesystem.
    """
    # Upload .tar.gz to local filesystem for one-server installations not using S3 or Swift
    data_root = path(settings.GITHUB_REPO_ROOT)
    subdir = base64.urlsafe_b64encode(repr(courselike_key).encode('utf-8')).decode('utf-8')
    course_dir = data_root / subdir
    filename = request.FILES['course-data'].name
    set_custom_attributes_for_course_key(courselike_key)
    current_step = 'Uploading'

    def error_response(message, status, stage):
        """Returns Json error response"""
        return JsonResponse({'ErrMsg': message, 'Stage': stage}, status=status)

    courselike_string = str(courselike_key) + filename
    # Do everything in a try-except block to make sure everything is properly cleaned up.
    try:
        # Use sessions to keep info about import progress
        _save_request_status(request, courselike_string, 0)

        if not filename.endswith('.tar.gz'):
            error_message = _('We only support uploading a .tar.gz file.')
            _save_request_status(request, courselike_string, -1)
            monitor_import_failure(courselike_key, current_step, message=error_message)
            return error_response(error_message, 415, 0)

        temp_filepath = course_dir / filename
        if not course_dir.isdir():
            os.mkdir(course_dir)

        logging.info(f'Course import {courselike_key}: importing course to {temp_filepath}')

        # Get upload chunks byte ranges
        try:
            matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"])
            content_range = matches.groupdict()
        except KeyError:  # Single chunk
            # no Content-Range header, so make one that will work
            logging.info(f'Course import {courselike_key}: single chunk found')
            content_range = {'start': 0, 'stop': 1, 'end': 2}

        # stream out the uploaded files in chunks to disk
        is_initial_import_request = int(content_range['start']) == 0
        if is_initial_import_request:
            mode = "wb+"
            set_custom_attribute('course_import_init', True)
        else:
            mode = "ab+"
            # Appending to fail would fail if the file doesn't exist.
            if not temp_filepath.exists():
                error_message = _('Some chunks missed during file upload. Please try again')
                _save_request_status(request, courselike_string, -1)
                log.error(f'Course Import {courselike_key}: {error_message}')
                monitor_import_failure(courselike_key, current_step, message=error_message)
                return error_response(error_message, 409, 0)

            size = os.path.getsize(temp_filepath)
            # Check to make sure we haven't missed a chunk
            # This shouldn't happen, even if different instances are handling
            # the same session, but it's always better to catch errors earlier.
            if size < int(content_range['start']):
                error_message = _('File upload corrupted. Please try again')
                _save_request_status(request, courselike_string, -1)
                log.error(f'Course import {courselike_key}: A chunk has been missed')
                monitor_import_failure(courselike_key, current_step, message=error_message)
                return error_response(error_message, 409, 0)

            # The last request sometimes comes twice. This happens because
            # nginx sends a 499 error code when the response takes too long.
            elif size > int(content_range['stop']) and size == int(content_range['end']):
                return JsonResponse({'ImportStatus': 1})

        with open(temp_filepath, mode) as temp_file:
            for chunk in request.FILES['course-data'].chunks():
                temp_file.write(chunk)

        size = os.path.getsize(temp_filepath)

        if int(content_range['stop']) != int(content_range['end']) - 1:
            # More chunks coming
            return JsonResponse({
                "files": [{
                    "name": filename,
                    "size": size,
                    "deleteUrl": "",
                    "deleteType": "",
                    "url": reverse_course_url('import_handler', courselike_key),
                    "thumbnailUrl": ""
                }]
            })

        log.info(f'Course import {courselike_key}: Upload complete')
        with open(temp_filepath, 'rb') as local_file:
            django_file = File(local_file)
            storage_path = course_import_export_storage.save('olx_import/' + filename, django_file)
        import_olx.delay(
            request.user.id, str(courselike_key), storage_path, filename, request.LANGUAGE_CODE)

    # Send errors to client with stage at which error occurred.
    except Exception as exception:  # pylint: disable=broad-except
        _save_request_status(request, courselike_string, -1)
        if course_dir.isdir():
            shutil.rmtree(course_dir)
            log.info("Course import %s: Temp data cleared", courselike_key)

        monitor_import_failure(courselike_key, current_step, exception=exception)
        log.exception(f'Course import {courselike_key}: error importing course.')
        return error_response(str(exception), 400, -1)

    return JsonResponse({'ImportStatus': 1})
Example #11
0
def import_olx(self, user_id, course_key_string, archive_path, archive_name,
               language):
    """
    Import a course or library from a provided OLX .tar.gz archive.
    """
    current_step = 'Unpacking'
    courselike_key = CourseKey.from_string(course_key_string)
    set_code_owner_attribute_from_module(__name__)
    set_custom_attributes_for_course_key(courselike_key)
    log_prefix = f'Course import {courselike_key}'
    self.status.set_state(current_step)

    data_root = path(settings.GITHUB_REPO_ROOT)
    subdir = base64.urlsafe_b64encode(
        repr(courselike_key).encode('utf-8')).decode('utf-8')
    course_dir = data_root / subdir

    def validate_user():
        """Validate if the user exists otherwise log error. """
        try:
            return User.objects.get(pk=user_id)
        except User.DoesNotExist as exc:
            with translation_language(language):
                self.status.fail(UserErrors.USER_PERMISSION_DENIED)
            LOGGER.error(f'{log_prefix}: Unknown User: {user_id}')
            monitor_import_failure(courselike_key, current_step, exception=exc)
            return

    def user_has_access(user):
        """Return True if user has studio write access to the given course."""
        has_access = has_course_author_access(user, courselike_key)
        if not has_access:
            message = f'User permission denied: {user.username}'
            with translation_language(language):
                self.status.fail(UserErrors.COURSE_PERMISSION_DENIED)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key,
                                   current_step,
                                   message=message)
        return has_access

    def file_is_supported():
        """Check if it is a supported file."""
        file_is_valid = archive_name.endswith('.tar.gz')

        if not file_is_valid:
            message = f'Unsupported file {archive_name}'
            with translation_language(language):
                self.status.fail(UserErrors.INVALID_FILE_TYPE)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key,
                                   current_step,
                                   message=message)
        return file_is_valid

    def file_exists_in_storage():
        """Verify archive path exists in storage."""
        archive_path_exists = course_import_export_storage.exists(archive_path)

        if not archive_path_exists:
            message = f'Uploaded file {archive_path} not found'
            with translation_language(language):
                self.status.fail(UserErrors.FILE_NOT_FOUND)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key,
                                   current_step,
                                   message=message)
        return archive_path_exists

    def verify_root_name_exists(course_dir, root_name):
        """Verify root xml file exists."""
        def get_all_files(directory):
            """
            For each file in the directory, yield a 2-tuple of (file-name,
            directory-path)
            """
            for directory_path, _dirnames, filenames in os.walk(directory):
                for filename in filenames:
                    yield (filename, directory_path)

        def get_dir_for_filename(directory, filename):
            """
            Returns the directory path for the first file found in the directory
            with the given name.  If there is no file in the directory with
            the specified name, return None.
            """
            for name, directory_path in get_all_files(directory):
                if name == filename:
                    return directory_path
            return None

        dirpath = get_dir_for_filename(course_dir, root_name)
        if not dirpath:
            message = UserErrors.FILE_MISSING.format(root_name)
            with translation_language(language):
                self.status.fail(message)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key,
                                   current_step,
                                   message=message)
            return
        return dirpath

    user = validate_user()
    if not user:
        return

    if not user_has_access(user):
        return

    if not file_is_supported():
        return

    is_library = isinstance(courselike_key, LibraryLocator)
    is_course = not is_library
    if is_library:
        root_name = LIBRARY_ROOT
        courselike_module = modulestore().get_library(courselike_key)
        import_func = import_library_from_xml
    else:
        root_name = COURSE_ROOT
        courselike_module = modulestore().get_course(courselike_key)
        import_func = import_course_from_xml

    # Locate the uploaded OLX archive (and download it from S3 if necessary)
    # Do everything in a try-except block to make sure everything is properly cleaned up.
    try:
        LOGGER.info(f'{log_prefix}: unpacking step started')

        temp_filepath = course_dir / get_valid_filename(archive_name)
        if not course_dir.isdir():
            os.mkdir(course_dir)

        LOGGER.info(f'{log_prefix}: importing course to {temp_filepath}')

        # Copy the OLX archive from where it was uploaded to (S3, Swift, file system, etc.)
        if not file_exists_in_storage():
            return

        with course_import_export_storage.open(archive_path, 'rb') as source:
            with open(temp_filepath, 'wb') as destination:

                def read_chunk():
                    """
                    Read and return a sequence of bytes from the source file.
                    """
                    return source.read(FILE_READ_CHUNK)

                for chunk in iter(read_chunk, b''):
                    destination.write(chunk)

        LOGGER.info(f'{log_prefix}: Download from storage complete')
        # Delete from source location
        course_import_export_storage.delete(archive_path)

        # If the course has an entrance exam then remove it and its corresponding milestone.
        # current course state before import.
        if is_course:
            if courselike_module.entrance_exam_enabled:
                fake_request = RequestFactory().get('/')
                fake_request.user = user
                from .views.entrance_exam import remove_entrance_exam_milestone_reference
                # TODO: Is this really ok?  Seems dangerous for a live course
                remove_entrance_exam_milestone_reference(
                    fake_request, courselike_key)
                LOGGER.info(
                    f'{log_prefix}: entrance exam milestone content reference has been removed'
                )
    # Send errors to client with stage at which error occurred.
    except Exception as exception:  # pylint: disable=broad-except
        if course_dir.isdir():
            shutil.rmtree(course_dir)
            LOGGER.info(f'{log_prefix}: Temp data cleared')

        self.status.fail(UserErrors.UNKNOWN_ERROR_IN_UNPACKING)
        LOGGER.exception(f'{log_prefix}: Unknown error while unpacking',
                         exc_info=True)
        monitor_import_failure(courselike_key,
                               current_step,
                               exception=exception)
        return

    # try-finally block for proper clean up after receiving file.
    try:
        tar_file = tarfile.open(temp_filepath)
        try:
            safetar_extractall(tar_file, (course_dir + '/'))
        except SuspiciousOperation as exc:
            with translation_language(language):
                self.status.fail(UserErrors.UNSAFE_TAR_FILE)
            LOGGER.error(f'{log_prefix}: Unsafe tar file')
            monitor_import_failure(courselike_key, current_step, exception=exc)
            return
        finally:
            tar_file.close()

        current_step = 'Verifying'
        self.status.set_state(current_step)
        self.status.increment_completed_steps()
        LOGGER.info(
            f'{log_prefix}: Uploaded file extracted. Verification step started'
        )

        dirpath = verify_root_name_exists(course_dir, root_name)
        if not dirpath:
            return

        if not validate_course_olx(courselike_key, dirpath, self.status):
            return

        dirpath = os.path.relpath(dirpath, data_root)

        current_step = 'Updating'
        self.status.set_state(current_step)
        self.status.increment_completed_steps()
        LOGGER.info(
            f'{log_prefix}: Extracted file verified. Updating course started')

        courselike_items = import_func(
            modulestore(),
            user.id,
            settings.GITHUB_REPO_ROOT,
            [dirpath],
            load_error_modules=False,
            static_content_store=contentstore(),
            target_id=courselike_key,
            verbose=True,
        )

        new_location = courselike_items[0].location
        LOGGER.debug('new course at %s', new_location)

        LOGGER.info(f'{log_prefix}: Course import successful')
        set_custom_attribute('course_import_completed', True)
    except (CourseImportException, InvalidProctoringProvider,
            DuplicateCourseError) as known_exe:
        handle_course_import_exception(courselike_key, known_exe, self.status)
    except Exception as exception:  # pylint: disable=broad-except
        handle_course_import_exception(courselike_key,
                                       exception,
                                       self.status,
                                       known=False)
    finally:
        if course_dir.isdir():
            shutil.rmtree(course_dir)
            LOGGER.info(f'{log_prefix}: Temp data cleared')

        if self.status.state == 'Updating' and is_course:
            # Reload the course so we have the latest state
            course = modulestore().get_course(courselike_key)
            if course.entrance_exam_enabled:
                entrance_exam_chapter = modulestore().get_items(
                    course.id,
                    qualifiers={'category': 'chapter'},
                    settings={'is_entrance_exam': True})[0]

                metadata = {
                    'entrance_exam_id': str(entrance_exam_chapter.location)
                }
                CourseMetadata.update_from_dict(metadata, course, user)
                from .views.entrance_exam import add_entrance_exam_milestone
                add_entrance_exam_milestone(course.id, entrance_exam_chapter)
                LOGGER.info(
                    f'Course import {course.id}: Entrance exam imported')