Exemple #1
0
    def post(self, entity_type, entity_id):
        """Saves an image uploaded by a content creator."""

        raw = self.request.get('image')
        filename = self.payload.get('filename')
        filename_prefix = self.payload.get('filename_prefix')
        if filename_prefix is None:
            filename_prefix = self._FILENAME_PREFIX

        try:
            file_format = image_validation_services.validate_image_and_filename(
                raw, filename)
        except utils.ValidationError as e:
            raise self.InvalidInputException(e)

        file_system_class = fs_services.get_entity_file_system_class()
        fs = fs_domain.AbstractFileSystem(
            file_system_class(entity_type, entity_id))
        filepath = '%s/%s' % (filename_prefix, filename)

        if fs.isfile(filepath):
            raise self.InvalidInputException(
                'A file with the name %s already exists. Please choose a '
                'different name.' % filename)
        image_is_compressible = (file_format
                                 in feconf.COMPRESSIBLE_IMAGE_FORMATS)
        fs_services.save_original_and_compressed_versions_of_image(
            filename, entity_type, entity_id, raw, filename_prefix,
            image_is_compressible)

        self.render_json({'filename': filename})
Exemple #2
0
    def test_audio_upload_with_non_mp3_file(self):
        self.login(self.EDITOR_EMAIL)
        csrf_token = self.get_new_csrf_token()

        file_system_class = fs_services.get_entity_file_system_class()
        fs = fs_domain.AbstractFileSystem(
            file_system_class(feconf.ENTITY_TYPE_EXPLORATION, '0'))

        with python_utils.open_file(os.path.join(feconf.TESTS_DATA_DIR,
                                                 self.TEST_AUDIO_FILE_FLAC),
                                    mode='rb',
                                    encoding=None) as f:
            raw_audio = f.read()

        self.assertFalse(fs.isfile('audio/%s' % self.TEST_AUDIO_FILE_FLAC))

        with self.accepted_audio_extensions_swap:
            self.post_json('%s/0' % self.AUDIO_UPLOAD_URL_PREFIX,
                           {'filename': self.TEST_AUDIO_FILE_FLAC},
                           csrf_token=csrf_token,
                           upload_files=[('raw_audio_file', 'unused_filename',
                                          raw_audio)])

        self.assertTrue(fs.isfile('audio/%s' % self.TEST_AUDIO_FILE_FLAC))

        self.logout()
def validate_svg_filenames_in_math_rich_text(entity_type, entity_id,
                                             html_string):
    """Validates the SVG filenames for each math rich-text components and
    returns a list of all invalid math tags in the given HTML.

    Args:
        entity_type: str. The type of the entity.
        entity_id: str. The ID of the entity.
        html_string: str. The HTML string.

    Returns:
        list(str). A list of invalid math tags in the HTML string.
    """
    soup = bs4.BeautifulSoup(html_string.encode(encoding='utf-8'),
                             'html.parser')
    error_list = []
    for math_tag in soup.findAll(name='oppia-noninteractive-math'):
        math_content_dict = (json.loads(
            unescape_html(math_tag['math_content-with-value'])))
        svg_filename = (objects.UnicodeString.normalize(
            math_content_dict['svg_filename']))
        if svg_filename == '':
            error_list.append(python_utils.UNICODE(math_tag))
        else:
            file_system_class = fs_services.get_entity_file_system_class()
            fs = fs_domain.AbstractFileSystem(
                file_system_class(entity_type, entity_id))
            filepath = 'image/%s' % svg_filename
            if not fs.isfile(filepath.encode('utf-8')):
                error_list.append(python_utils.UNICODE(math_tag))
    return error_list
    def test_copy_question_images_to_the_correct_storage_path(self):
        """Tests that the question images are copied to the correct storage
        path.
        """
        file_system_class = fs_services.get_entity_file_system_class()
        question_fs = fs_domain.AbstractFileSystem(file_system_class(
            feconf.ENTITY_TYPE_QUESTION, self.QUESTION_ID))

        # Assert that the storage paths do not exist before the job is run.
        self.assertFalse(question_fs.isfile('image/img.png'))
        self.assertFalse(question_fs.isfile('image/test_svg.svg'))

        # Start the job.
        job_id = (
            question_jobs_one_off.FixQuestionImagesStorageOneOffJob.create_new()
        )
        question_jobs_one_off.FixQuestionImagesStorageOneOffJob.enqueue(job_id)
        self.process_and_flush_pending_mapreduce_tasks()

        # Verify that the storage paths exist and the status is reported in the
        # job output.
        self.assertTrue(question_fs.isfile('image/img.png'))
        self.assertTrue(question_fs.isfile('image/test_svg.svg'))

        output = (
            question_jobs_one_off.FixQuestionImagesStorageOneOffJob.get_output(
                job_id))
        expected = [[u'question_image_copied',
                     [u'2 image paths were fixed for question id question_id '
                      u'with linked_skill_ids: '
                      u'[u\'skill_id_1\', u\'skill_id_2\']']]]
        self.assertEqual(expected, [ast.literal_eval(x) for x in output])
Exemple #5
0
    def map(item):
        if item.deleted:
            yield (FixQuestionImagesStorageOneOffJob._DELETED_KEY, 1)
            return

        question = question_fetchers.get_question_from_model(item)
        html_list = question.question_state_data.get_all_html_content_strings()
        image_filenames = html_cleaner.get_image_filenames_from_html_strings(
            html_list)
        file_system_class = fs_services.get_entity_file_system_class()
        question_fs = fs_domain.AbstractFileSystem(file_system_class(
            feconf.ENTITY_TYPE_QUESTION, question.id))
        success_count = 0
        # For each image filename, check if it exists in the correct path. If
        # not, copy the image file to the correct path else continue.
        for image_filename in image_filenames:
            if not question_fs.isfile('image/%s' % image_filename):
                for skill_id in question.linked_skill_ids:
                    skill_fs = fs_domain.AbstractFileSystem(file_system_class(
                        feconf.ENTITY_TYPE_SKILL, skill_id))
                    if skill_fs.isfile('image/%s' % image_filename):
                        fs_services.copy_images(
                            feconf.ENTITY_TYPE_SKILL, skill_id,
                            feconf.ENTITY_TYPE_QUESTION, question.id,
                            [image_filename])
                        success_count += 1
                        break
        if success_count > 0:
            yield (
                FixQuestionImagesStorageOneOffJob._IMAGE_COPIED,
                '%s image paths were fixed for question id %s with '
                'linked_skill_ids: %r' % (
                    success_count, question.id, question.linked_skill_ids))
Exemple #6
0
    def _convert_state_v32_dict_to_v33_dict(cls, question_id,
                                            question_state_dict):
        """Converts from version 32 to 33. Version 33 adds
        dimensions to images in the oppia-noninteractive-image tags
        located inside tabs and collapsible blocks.

        Args:
            question_id: str. Question id.
            question_state_dict: dict. A dict where each key-value pair
                represents respectively, a state name and a dict used to
                initalize a State domain object.

        Returns:
            dict. The converted question_state_dict.
        """
        file_system_class = fs_services.get_entity_file_system_class()
        exploration_fs = fs_domain.AbstractFileSystem(
            file_system_class(feconf.ENTITY_TYPE_QUESTION, question_id))
        add_dimensions_to_image_tags = functools.partial(
            html_validation_service.add_dims_to_img_in_complex_rte,
            exploration_fs)
        question_state_dict = state_domain.State.convert_html_fields_in_state(
            question_state_dict, add_dimensions_to_image_tags)

        return question_state_dict
Exemple #7
0
    def post(self, entity_type, entity_id):
        """Saves an image uploaded by a content creator."""

        raw = self.request.get('image')
        filename = self.payload.get('filename')
        filename_prefix = self.payload.get('filename_prefix')
        if filename_prefix is None:
            filename_prefix = self._FILENAME_PREFIX
        if not raw:
            raise self.InvalidInputException('No image supplied')

        allowed_formats = ', '.join(
            list(feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS.keys()))

        # Verify that the data is recognized as an image.
        file_format = imghdr.what(None, h=raw)
        if file_format not in feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS:
            raise self.InvalidInputException('Image not recognized')

        # Verify that the file type matches the supplied extension.
        if not filename:
            raise self.InvalidInputException('No filename supplied')
        if filename.rfind('.') == 0:
            raise self.InvalidInputException('Invalid filename')
        if '/' in filename or '..' in filename:
            raise self.InvalidInputException(
                'Filenames should not include slashes (/) or consecutive '
                'dot characters.')
        if '.' not in filename:
            raise self.InvalidInputException(
                'Image filename with no extension: it should have '
                'one of the following extensions: %s.' % allowed_formats)

        dot_index = filename.rfind('.')
        extension = filename[dot_index + 1:].lower()
        if (extension not in
                feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS[file_format]):
            raise self.InvalidInputException(
                'Expected a filename ending in .%s, received %s' %
                (file_format, filename))

        file_system_class = fs_services.get_entity_file_system_class()
        fs = fs_domain.AbstractFileSystem(
            file_system_class(entity_type, entity_id))
        filepath = '%s/%s' % (filename_prefix, filename)

        if fs.isfile(filepath):
            raise self.InvalidInputException(
                'A file with the name %s already exists. Please choose a '
                'different name.' % filename)

        fs_services.save_original_and_compressed_versions_of_image(
            self.user_id, filename, entity_type, entity_id, raw,
            filename_prefix)

        self.render_json({'filename': filename})
 def test_save_and_get_classifier_data(self):
     """Test that classifier data is stored and retrieved correctly."""
     fs_services.save_classifier_data('exp_id', 'job_id',
                                      self.classifier_data_proto)
     filepath = 'job_id-classifier-data.pb.xz'
     file_system_class = fs_services.get_entity_file_system_class()
     fs = fs_domain.AbstractFileSystem(
         file_system_class(feconf.ENTITY_TYPE_EXPLORATION, 'exp_id'))
     classifier_data = utils.decompress_from_zlib(fs.get(filepath))
     classifier_data_proto = text_classifier_pb2.TextClassifierFrozenModel()
     classifier_data_proto.ParseFromString(classifier_data)
     self.assertEqual(classifier_data_proto.model_json,
                      self.classifier_data_proto.model_json)
Exemple #9
0
def get_filename_with_dimensions(old_filename, exp_id):
    """Gets the filename with dimensions of the image file in it.

    Args:
        old_filename: str. Name of the file whose dimensions need to be
            calculated.
        exp_id: str. Exploration id.

    Returns:
        str. The new filename of the image file.
    """
    file_system_class = fs_services.get_entity_file_system_class()
    fs = fs_domain.AbstractFileSystem(file_system_class(
        feconf.ENTITY_TYPE_EXPLORATION, exp_id))
    filepath = 'image/%s' % old_filename
    try:
        content = fs.get(filepath.encode('utf-8'))
        height, width = gae_image_services.get_image_dimensions(content)
    except IOError:
        height = 120
        width = 120
    new_filename = regenerate_image_filename_using_dimensions(
        old_filename, height, width)
    return new_filename
Exemple #10
0
    def post(self, exploration_id):
        """Saves an audio file uploaded by a content creator."""
        raw_audio_file = self.request.get('raw_audio_file')
        filename = self.payload.get('filename')
        allowed_formats = list(feconf.ACCEPTED_AUDIO_EXTENSIONS.keys())

        if not raw_audio_file:
            raise self.InvalidInputException('No audio supplied')
        dot_index = filename.rfind('.')
        extension = filename[dot_index + 1:].lower()

        if dot_index == -1 or dot_index == 0:
            raise self.InvalidInputException(
                'No filename extension: it should have '
                'one of the following extensions: %s' % allowed_formats)
        if extension not in feconf.ACCEPTED_AUDIO_EXTENSIONS:
            raise self.InvalidInputException(
                'Invalid filename extension: it should have '
                'one of the following extensions: %s' % allowed_formats)

        tempbuffer = python_utils.string_io()
        tempbuffer.write(raw_audio_file)
        tempbuffer.seek(0)
        try:
            # For every accepted extension, use the mutagen-specific
            # constructor for that type. This will catch mismatched audio
            # types e.g. uploading a flac file with an MP3 extension.
            if extension == 'mp3':
                audio = mp3.MP3(tempbuffer)
            else:
                audio = mutagen.File(tempbuffer)
        except mutagen.MutagenError:
            # The calls to mp3.MP3() versus mutagen.File() seem to behave
            # differently upon not being able to interpret the audio.
            # mp3.MP3() raises a MutagenError whereas mutagen.File()
            # seems to return None. It's not clear if this is always
            # the case. Occasionally, mutagen.File() also seems to
            # raise a MutagenError.
            raise self.InvalidInputException(
                'Audio not recognized as a %s file' % extension)
        tempbuffer.close()

        if audio is None:
            raise self.InvalidInputException(
                'Audio not recognized as a %s file' % extension)
        if audio.info.length > feconf.MAX_AUDIO_FILE_LENGTH_SEC:
            raise self.InvalidInputException(
                'Audio files must be under %s seconds in length. The uploaded '
                'file is %.2f seconds long.' % (
                    feconf.MAX_AUDIO_FILE_LENGTH_SEC, audio.info.length))
        if len(set(audio.mime).intersection(
                set(feconf.ACCEPTED_AUDIO_EXTENSIONS[extension]))) == 0:
            raise self.InvalidInputException(
                'Although the filename extension indicates the file '
                'is a %s file, it was not recognized as one. '
                'Found mime types: %s' % (extension, audio.mime))

        mimetype = audio.mime[0]
        # Fetch the audio file duration from the Mutagen metadata.
        duration_secs = audio.info.length

        # For a strange, unknown reason, the audio variable must be
        # deleted before opening cloud storage. If not, cloud storage
        # throws a very mysterious error that entails a mutagen
        # object being recursively passed around in app engine.
        del audio

        # Audio files are stored to the datastore in the dev env, and to GCS
        # in production.
        file_system_class = fs_services.get_entity_file_system_class()
        fs = fs_domain.AbstractFileSystem(file_system_class(
            feconf.ENTITY_TYPE_EXPLORATION, exploration_id))
        fs.commit(
            '%s/%s' % (self._FILENAME_PREFIX, filename),
            raw_audio_file, mimetype=mimetype)

        self.render_json({'filename': filename, 'duration_secs': duration_secs})
Exemple #11
0
    def _convert_states_v32_dict_to_v33_dict(cls, exp_id, draft_change_list):
        """Converts draft change list from state version 32 to 33. State
        version 33 adds image dimensions to images inside collapsible
        blocks and tabs, for which there should be no changes to drafts.

        Args:
            exp_id: str. Exploration id.
            draft_change_list: list(ExplorationChange). The list of
                ExplorationChange domain objects to upgrade.

        Returns:
            list(ExplorationChange). The converted draft_change_list.
        """
        file_system_class = fs_services.get_entity_file_system_class()
        exploration_fs = fs_domain.AbstractFileSystem(
            file_system_class(feconf.ENTITY_TYPE_EXPLORATION, exp_id))
        for i, change in enumerate(draft_change_list):
            # Changes for html in state content.
            if (change.cmd == exp_domain.CMD_EDIT_STATE_PROPERTY and
                    change.property_name == exp_domain.STATE_PROPERTY_CONTENT):
                html_string = change.new_value['html']
                converted_html_string = (
                    html_validation_service.add_dims_to_img_in_complex_rte(
                        exploration_fs, html_string))
                draft_change_list[i].new_value[u'html'] = converted_html_string

            # Changes for html in interaction answer groups.
            if (change.cmd == exp_domain.CMD_EDIT_STATE_PROPERTY
                    and change.property_name
                    == exp_domain.STATE_PROPERTY_INTERACTION_ANSWER_GROUPS):
                updated_answer_group_dicts = []
                for answer_group_index in python_utils.RANGE(
                        len(change.new_value)):
                    outcome = (change.new_value[answer_group_index]['outcome'])
                    html_string = outcome['feedback']['html']
                    converted_html_string = (
                        html_validation_service.add_dims_to_img_in_complex_rte(
                            exploration_fs, html_string))
                    outcome['feedback']['html'] = converted_html_string
                    updated_answer_group_dicts.append({
                        'rule_specs':
                        (change.new_value[answer_group_index]['rule_specs']),
                        'outcome':
                        outcome,
                        'training_data':
                        (change.new_value[answer_group_index]['training_data']
                         ),
                        'tagged_skill_misconception_id':
                        (change.new_value[answer_group_index]
                         ['tagged_skill_misconception_id'])
                    })

                draft_change_list[i].new_value = updated_answer_group_dicts

            # Changes for html in hints.
            if (change.cmd == exp_domain.CMD_EDIT_STATE_PROPERTY
                    and change.property_name
                    == exp_domain.STATE_PROPERTY_INTERACTION_HINTS):
                updated_hint_dicts = []
                for hint_index in python_utils.RANGE(len(change.new_value)):
                    hint_content = change.new_value[hint_index]['hint_content']
                    html_string = hint_content['html']
                    converted_html_string = (
                        html_validation_service.add_dims_to_img_in_complex_rte(
                            exploration_fs, html_string))
                    updated_hint_dicts.append({
                        'hint_content': {
                            'content_id': hint_content['content_id'],
                            'html': converted_html_string
                        }
                    })

                draft_change_list[i].new_value = updated_hint_dicts

            # Changes for html in solution.
            if (change.cmd == exp_domain.CMD_EDIT_STATE_PROPERTY
                    and change.property_name
                    == exp_domain.STATE_PROPERTY_INTERACTION_SOLUTION):
                html_string = change.new_value['explanation']['html']
                converted_html_string = (
                    html_validation_service.add_dims_to_img_in_complex_rte(
                        exploration_fs, html_string))
                draft_change_list[i].new_value[u'explanation'][u'html'] = (
                    converted_html_string)
        return draft_change_list
 def test_get_exploration_file_system_with_dev_mode_disabled(self):
     with self.swap(constants, 'DEV_MODE', False):
         file_system = fs_services.get_entity_file_system_class()
         self.assertIsInstance(
             file_system(feconf.ENTITY_TYPE_EXPLORATION, 'entity_id'),
             fs_domain.GcsFileSystem)
Exemple #13
0
    def post(self, entity_type, entity_id):
        """Saves an image uploaded by a content creator."""

        raw = self.request.get('image')
        filename = self.payload.get('filename')
        filename_prefix = self.payload.get('filename_prefix')
        if filename_prefix is None:
            filename_prefix = self._FILENAME_PREFIX
        if not raw:
            raise self.InvalidInputException('No image supplied')
        if len(raw) > self.HUNDRED_KB_IN_BYTES:
            raise self.InvalidInputException(
                'Image exceeds file size limit of 100 KB.')
        allowed_formats = ', '.join(
            list(feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS.keys()))
        if html_validation_service.is_parsable_as_xml(raw):
            file_format = 'svg'
            invalid_tags, invalid_attrs = (
                html_validation_service.get_invalid_svg_tags_and_attrs(raw))
            if invalid_tags or invalid_attrs:
                invalid_tags_message = ('tags: %s' %
                                        invalid_tags if invalid_tags else '')
                invalid_attrs_message = ('attributes: %s' % invalid_attrs
                                         if invalid_attrs else '')
                raise self.InvalidInputException(
                    'Unsupported tags/attributes found in the SVG:\n%s\n%s' %
                    (invalid_tags_message, invalid_attrs_message))
        else:
            # Verify that the data is recognized as an image.
            file_format = imghdr.what(None, h=raw)
            if file_format not in feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS:
                raise self.InvalidInputException('Image not recognized')

        # Verify that the file type matches the supplied extension.
        if not filename:
            raise self.InvalidInputException('No filename supplied')
        if filename.rfind('.') == 0:
            raise self.InvalidInputException('Invalid filename')
        if '/' in filename or '..' in filename:
            raise self.InvalidInputException(
                'Filenames should not include slashes (/) or consecutive '
                'dot characters.')
        if '.' not in filename:
            raise self.InvalidInputException(
                'Image filename with no extension: it should have '
                'one of the following extensions: %s.' % allowed_formats)

        dot_index = filename.rfind('.')
        extension = filename[dot_index + 1:].lower()
        if (extension not in
                feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS[file_format]):
            raise self.InvalidInputException(
                'Expected a filename ending in .%s, received %s' %
                (file_format, filename))

        file_system_class = fs_services.get_entity_file_system_class()
        fs = fs_domain.AbstractFileSystem(
            file_system_class(entity_type, entity_id))
        filepath = '%s/%s' % (filename_prefix, filename)

        if fs.isfile(filepath):
            raise self.InvalidInputException(
                'A file with the name %s already exists. Please choose a '
                'different name.' % filename)
        image_is_compressible = (file_format
                                 in feconf.COMPRESSIBLE_IMAGE_FORMATS)
        fs_services.save_original_and_compressed_versions_of_image(
            filename, entity_type, entity_id, raw, filename_prefix,
            image_is_compressible)

        self.render_json({'filename': filename})