Ejemplo n.º 1
0
    def get(self, exploration_id, encoded_filepath):
        """Returns an image.

        Args:
            exploration_id: the id of the exploration.
            encoded_filepath: a string representing the image filepath. This
              string is encoded in the frontend using encodeURIComponent().
        """
        try:
            filepath = urllib.unquote(encoded_filepath)
            file_format = filepath[(filepath.rfind('.') + 1):]
            # If the following is not cast to str, an error occurs in the wsgi
            # library because unicode gets used.
            self.response.headers['Content-Type'] = str(
                'image/%s' % file_format)

            fs = fs_domain.AbstractFileSystem(
                fs_domain.ExplorationFileSystem(
                    'exploration/%s' % exploration_id))
            raw = fs.get(filepath)

            self.response.cache_control.no_cache = None
            self.response.cache_control.public = True
            self.response.cache_control.max_age = 600
            self.response.write(raw)
        except:
            raise self.PageNotFoundException
Ejemplo n.º 2
0
    def get(self, exploration_id, filename):
        """Returns an audio file.

        Args:
            encoded_filepath: a string representing the audio filepath. This
              string is encoded in the frontend using encodeURIComponent().
        """
        if not constants.DEV_MODE:
            raise self.PageNotFoundException

        file_format = filename[(filename.rfind('.') + 1):]
        # If the following is not cast to str, an error occurs in the wsgi
        # library because unicode gets used.
        self.response.headers['Content-Type'] = str('audio/%s' % file_format)

        fs = fs_domain.AbstractFileSystem(
            fs_domain.ExplorationFileSystem('exploration/%s' % exploration_id))

        try:
            raw = fs.get('%s/%s' % (self._AUDIO_PATH_PREFIX, filename))
        except:
            raise self.PageNotFoundException

        self.response.cache_control.no_cache = None
        self.response.cache_control.public = True
        self.response.cache_control.max_age = 600
        self.response.write(raw)
Ejemplo n.º 3
0
    def test_save_original_and_compressed_versions_of_svg_image(self):
        with python_utils.open_file(os.path.join(feconf.TESTS_DATA_DIR,
                                                 'test_svg.svg'),
                                    'rb',
                                    encoding=None) as f:
            image_content = f.read()

        with self.swap(constants, 'DEV_MODE', False):
            fs = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION,
                                        self.EXPLORATION_ID))

            self.assertFalse(fs.isfile('image/%s' % self.FILENAME))
            self.assertFalse(
                fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
            self.assertFalse(fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))

            fs_services.save_original_and_compressed_versions_of_image(
                self.FILENAME, 'exploration', self.EXPLORATION_ID,
                image_content, 'image', False)

            self.assertTrue(fs.isfile('image/%s' % self.FILENAME))
            self.assertTrue(
                fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
            self.assertTrue(fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))

            original_image_content = fs.get('image/%s' % self.FILENAME)
            compressed_image_content = fs.get('image/%s' %
                                              self.COMPRESSED_IMAGE_FILENAME)
            micro_image_content = fs.get('image/%s' %
                                         self.MICRO_IMAGE_FILENAME)

            self.assertEqual(original_image_content, image_content)
            self.assertEqual(compressed_image_content, image_content)
            self.assertEqual(micro_image_content, image_content)
Ejemplo n.º 4
0
 def test_copy_images(self):
     with python_utils.open_file(os.path.join(feconf.TESTS_DATA_DIR,
                                              'img.png'),
                                 'rb',
                                 encoding=None) as f:
         original_image_content = f.read()
     fs_services.save_original_and_compressed_versions_of_image(
         self.FILENAME, 'exploration', self.EXPLORATION_ID,
         original_image_content, 'image', True)
     destination_fs = fs_domain.AbstractFileSystem(
         fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_QUESTION,
                                 'question_id1'))
     self.assertFalse(destination_fs.isfile('image/%s' % self.FILENAME))
     self.assertFalse(
         destination_fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
     self.assertFalse(
         destination_fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))
     fs_services.copy_images(feconf.ENTITY_TYPE_EXPLORATION,
                             self.EXPLORATION_ID,
                             feconf.ENTITY_TYPE_QUESTION, 'question_id1',
                             ['image.png'])
     self.assertTrue(destination_fs.isfile('image/%s' % self.FILENAME))
     self.assertTrue(
         destination_fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
     self.assertTrue(
         destination_fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))
Ejemplo n.º 5
0
    def get(self, exploration_id, asset_type, encoded_filename):
        """Returns an asset file.

        Args:
            exploration_id: str. The id of the exploration.
            asset_type: str. Type of the asset, either image or audio.
            encoded_filename: str. The asset filename. This
              string is encoded in the frontend using encodeURIComponent().
        """
        if not constants.DEV_MODE:
            raise self.PageNotFoundException
        if asset_type not in self._SUPPORTED_TYPES:
            raise Exception('%s is not a supported asset type.' % asset_type)
        try:
            filename = urllib.unquote(encoded_filename)
            file_format = filename[(filename.rfind('.') + 1):]

            # If the following is not cast to str, an error occurs in the wsgi
            # library because unicode gets used.
            self.response.headers['Content-Type'] = str(
                '%s/%s' % (asset_type, file_format))

            fs = fs_domain.AbstractFileSystem(
                fs_domain.ExplorationFileSystem('exploration/%s' %
                                                exploration_id))
            raw = fs.get('%s/%s' % (asset_type, filename))

            self.response.cache_control.no_cache = None
            self.response.cache_control.public = True
            self.response.cache_control.max_age = 600
            self.response.write(raw)
        except:
            raise self.PageNotFoundException
Ejemplo n.º 6
0
 def map(exp_model):
     if not constants.DEV_MODE:
         exp_id = exp_model.id
         fs_old = fs_domain.AbstractFileSystem(
             fs_domain.GcsFileSystem(exp_id))
         # We have to make sure we pass the dir name without starting or
         # ending with '/'.
         image_urls = fs_old.listdir('image')
         audio_urls = fs_old.listdir('audio')
         for url in image_urls:
             catched_groups = GCS_IMAGE_ID_REGEX.match(url)
             if not catched_groups:
                 yield (INVALID_GCS_URL, url.encode('utf-8'))
             else:
                 try:
                     filename = GCS_IMAGE_ID_REGEX.match(url).group(3)
                     if fs_old.isfile('image/%s' %
                                      filename.encode('utf-8')):
                         yield (FILE_FOUND_IN_GCS, filename.encode('utf-8'))
                 except Exception:
                     yield (ERROR_IN_FILENAME, url.encode('utf-8'))
         for url in audio_urls:
             catched_groups = GCS_AUDIO_ID_REGEX.match(url)
             if not catched_groups:
                 yield (INVALID_GCS_URL, url)
Ejemplo n.º 7
0
 def setUp(self):
     super(DatastoreBackedFileSystemUnitTests, self).setUp()
     self.user_email = '*****@*****.**'
     self.user_id = self.get_user_id_from_email(self.user_email)
     self.fs = fs_domain.AbstractFileSystem(
         fs_domain.DatastoreBackedFileSystem(
             fs_domain.ENTITY_TYPE_EXPLORATION, 'eid'))
Ejemplo n.º 8
0
    def map(file_model):
        # This job is allowed to run only in Production environment since it
        # uses GcsFileSystem which can't be used in Development environment.
        if feconf.DEV_MODE:
            return

        instance_id = file_model.id
        filetype = instance_id[instance_id.rfind('.') + 1:]
        # To separate the image entries from the audio entries we get from the
        # FileSnapshotContentModel.
        if filetype in ALLOWED_IMAGE_EXTENSIONS:
            catched_groups = FILE_MODEL_ID_REGEX.match(instance_id)
            if not catched_groups:
                yield (WRONG_INSTANCE_ID, instance_id)
            else:
                filename = catched_groups.group(2)
                exploration_id = catched_groups.group(1)
                content = file_model.content
                fs = fs_domain.AbstractFileSystem(
                    fs_domain.GcsFileSystem(exploration_id))
                if fs.isfile('image/%s' % filename):
                    yield (FILE_ALREADY_EXISTS, file_model.id)
                else:
                    fs.commit('ADMIN',
                              'image/%s' % filename,
                              content,
                              mimetype='image/%s' % filetype)
                    if not fs.isfile('image/%s' % filename):
                        yield ('Failed to commit file', instance_id)
                    else:
                        yield (FILE_COPIED, 1)
        else:
            yield ('Invalid filetype', filetype)
Ejemplo n.º 9
0
    def _convert_state_v32_dict_to_v33_dict(cls, question_id,
                                            question_state_dict):
        """Converts from version 32 to 33. Version 33 adds
        dimensions to images in the oppia-noninteractive-image tags
        located inside tabs and collapsible blocks.

        Args:
            question_id: str. Question id.
            question_state_dict: dict. A dict where each key-value pair
                represents respectively, a state name and a dict used to
                initalize a State domain object.

        Returns:
            dict. The converted question_state_dict.
        """
        file_system_class = fs_services.get_entity_file_system_class()
        exploration_fs = fs_domain.AbstractFileSystem(
            file_system_class(feconf.ENTITY_TYPE_QUESTION, question_id))
        add_dimensions_to_image_tags = functools.partial(
            html_validation_service.add_dims_to_img_in_complex_rte,
            exploration_fs)
        question_state_dict = state_domain.State.convert_html_fields_in_state(
            question_state_dict, add_dimensions_to_image_tags)

        return question_state_dict
Ejemplo n.º 10
0
def validate_svg_filenames_in_math_rich_text(entity_type, entity_id,
                                             html_string):
    """Validates the SVG filenames for each math rich-text components and
    returns a list of all invalid math tags in the given HTML.

    Args:
        entity_type: str. The type of the entity.
        entity_id: str. The ID of the entity.
        html_string: str. The HTML string.

    Returns:
        list(str). A list of invalid math tags in the HTML string.
    """
    soup = bs4.BeautifulSoup(html_string, 'html.parser')
    error_list = []
    for math_tag in soup.findAll(name='oppia-noninteractive-math'):
        math_content_dict = (json.loads(
            unescape_html(math_tag['math_content-with-value'])))
        svg_filename = (objects.UnicodeString.normalize(
            math_content_dict['svg_filename']))
        if svg_filename == '':
            error_list.append(python_utils.UNICODE(math_tag))
        else:
            file_system_class = fs_services.get_entity_file_system_class()
            fs = fs_domain.AbstractFileSystem(
                file_system_class(entity_type, entity_id))
            filepath = 'image/%s' % svg_filename
            if not fs.isfile(filepath):
                error_list.append(python_utils.UNICODE(math_tag))
    return error_list
Ejemplo n.º 11
0
    def post(self, entity_type, entity_id):
        """Saves an image uploaded by a content creator."""

        raw = self.normalized_request.get('image')
        filename = self.normalized_payload.get('filename')
        filename_prefix = self.normalized_payload.get('filename_prefix')

        try:
            file_format = image_validation_services.validate_image_and_filename(
                raw, filename)
        except utils.ValidationError as e:
            raise self.InvalidInputException(e)

        file_system_class = fs_services.get_entity_file_system_class()
        fs = fs_domain.AbstractFileSystem(
            file_system_class(entity_type, entity_id))
        filepath = '%s/%s' % (filename_prefix, filename)

        if fs.isfile(filepath):
            raise self.InvalidInputException(
                'A file with the name %s already exists. Please choose a '
                'different name.' % filename)
        image_is_compressible = (file_format
                                 in feconf.COMPRESSIBLE_IMAGE_FORMATS)
        fs_services.save_original_and_compressed_versions_of_image(
            filename, entity_type, entity_id, raw, filename_prefix,
            image_is_compressible)

        self.render_json({'filename': filename})
Ejemplo n.º 12
0
    def test_copy_question_images_to_the_correct_storage_path(self):
        """Tests that the question images are copied to the correct storage
        path.
        """
        file_system_class = fs_services.get_entity_file_system_class()
        question_fs = fs_domain.AbstractFileSystem(
            file_system_class(feconf.ENTITY_TYPE_QUESTION, self.QUESTION_ID))

        # Assert that the storage paths do not exist before the job is run.
        self.assertFalse(question_fs.isfile('image/img.png'))
        self.assertFalse(question_fs.isfile('image/test_svg.svg'))

        # Start the job.
        job_id = (question_jobs_one_off.FixQuestionImagesStorageOneOffJob.
                  create_new())
        question_jobs_one_off.FixQuestionImagesStorageOneOffJob.enqueue(job_id)
        self.process_and_flush_pending_mapreduce_tasks()

        # Verify that the storage paths exist and the status is reported in the
        # job output.
        self.assertTrue(question_fs.isfile('image/img.png'))
        self.assertTrue(question_fs.isfile('image/test_svg.svg'))

        output = (question_jobs_one_off.FixQuestionImagesStorageOneOffJob.
                  get_output(job_id))
        expected = [[
            u'question_image_copied',
            [
                u'2 image paths were fixed for question id question_id '
                u'with linked_skill_ids: '
                u'[u\'skill_id_1\', u\'skill_id_2\']'
            ]
        ]]
        self.assertEqual(expected, [ast.literal_eval(x) for x in output])
Ejemplo n.º 13
0
    def get(self, exploration_id):
        """Handles GET requests."""
        fs = fs_domain.AbstractFileSystem(
            fs_domain.ExplorationFileSystem(exploration_id))
        dir_list = fs.listdir('')

        self.render_json({'filepaths': dir_list})
Ejemplo n.º 14
0
    def test_job_with_thumbnail_in_filesystem_logs_success(self):
        self.save_new_topic(
            self.TOPIC_ID, self.albert_id, name='A name',
            abbreviated_name='abbrev', description='description',
            thumbnail_size_in_bytes=None)

        # Save the dummy image to the filesystem to be used as thumbnail.
        with python_utils.open_file(
            os.path.join(feconf.TESTS_DATA_DIR, 'test_svg.svg'),
            'rb', encoding=None) as f:
            raw_image = f.read()
        fs = fs_domain.AbstractFileSystem(
            fs_domain.GcsFileSystem(
                feconf.ENTITY_TYPE_TOPIC, self.TOPIC_ID))
        fs.commit(
            '%s/topic.svg' % (constants.ASSET_TYPE_THUMBNAIL), raw_image,
            mimetype='image/svg+xml')

        # Start migration job on sample topic.
        job_id = (
            topic_jobs_one_off.PopulateTopicThumbnailSizeOneOffJob.create_new()
        )
        topic_jobs_one_off.PopulateTopicThumbnailSizeOneOffJob.enqueue(job_id)

        # This running without errors indicates that deleted topics are
        # skipped.
        self.process_and_flush_pending_mapreduce_tasks()

        output = (
            topic_jobs_one_off.PopulateTopicThumbnailSizeOneOffJob.get_output(
                job_id))
        expected = [[u'thumbnail_size_newly_added', 1]]

        self.assertEqual(expected, [ast.literal_eval(x) for x in output])
        topic_services.delete_topic(self.albert_id, self.TOPIC_ID)
Ejemplo n.º 15
0
    def test_audio_upload_with_non_mp3_file(self):
        self.login(self.EDITOR_EMAIL)
        csrf_token = self.get_new_csrf_token()

        file_system_class = fs_services.get_entity_file_system_class()
        fs = fs_domain.AbstractFileSystem(
            file_system_class(feconf.ENTITY_TYPE_EXPLORATION, '0'))

        with python_utils.open_file(os.path.join(feconf.TESTS_DATA_DIR,
                                                 self.TEST_AUDIO_FILE_FLAC),
                                    mode='rb',
                                    encoding=None) as f:
            raw_audio = f.read()

        self.assertFalse(fs.isfile('audio/%s' % self.TEST_AUDIO_FILE_FLAC))

        with self.accepted_audio_extensions_swap:
            self.post_json('%s/0' % self.AUDIO_UPLOAD_URL_PREFIX,
                           {'filename': self.TEST_AUDIO_FILE_FLAC},
                           csrf_token=csrf_token,
                           upload_files=[('raw_audio_file', 'unused_filename',
                                          raw_audio)])

        self.assertTrue(fs.isfile('audio/%s' % self.TEST_AUDIO_FILE_FLAC))

        self.logout()
Ejemplo n.º 16
0
def export_to_zip_file(exploration_id, version=None):
    """Returns a ZIP archive of the exploration."""
    exploration = get_exploration_by_id(exploration_id, version=version)
    yaml_repr = exploration.to_yaml()

    o = StringIO.StringIO()
    with zipfile.ZipFile(o, mode='w', compression=zipfile.ZIP_DEFLATED) as zf:
        zf.writestr('%s.yaml' % exploration.title, yaml_repr)

        fs = fs_domain.AbstractFileSystem(
            fs_domain.ExplorationFileSystem(exploration_id))
        dir_list = fs.listdir('')
        for filepath in dir_list:
            # Currently, the version number of all files is 1, since they are
            # not modifiable post-upload.
            # TODO(sll): When allowing editing of files, implement versioning
            # for them.
            file_contents = fs.get(filepath, version=1)

            str_filepath = 'assets/%s' % filepath
            assert isinstance(str_filepath, str)
            unicode_filepath = str_filepath.decode('utf-8')
            zf.writestr(unicode_filepath, file_contents)

    return o.getvalue()
Ejemplo n.º 17
0
    def test_listdir(self):
        self.fs.commit('abc.png', 'file_contents')
        self.fs.commit('abcd.png', 'file_contents_2')
        self.fs.commit('abc/abcd.png', 'file_contents_3')
        self.fs.commit('bcd/bcde.png', 'file_contents_4')

        file_names = ['abc.png', 'abc/abcd.png', 'abcd.png', 'bcd/bcde.png']

        self.assertEqual(self.fs.listdir(''), file_names)

        self.assertEqual(self.fs.listdir('abc'), ['abc/abcd.png'])

        with self.assertRaisesRegexp(IOError, 'Invalid filepath'):
            self.fs.listdir('/abc')

        with self.assertRaisesRegexp(IOError,
                                     ('The dir_name should not start with /'
                                      ' or end with / : abc/')):
            self.fs.listdir('abc/')

        self.assertEqual(self.fs.listdir('fake_dir'), [])

        new_fs = fs_domain.AbstractFileSystem(
            fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, 'eid2'))
        self.assertEqual(new_fs.listdir('assets'), [])
Ejemplo n.º 18
0
 def setUp(self):
     super(GcsFileSystemUnitTests, self).setUp()
     self.USER_EMAIL = '*****@*****.**'
     self.signup(self.USER_EMAIL, 'username')
     self.user_id = self.get_user_id_from_email(self.USER_EMAIL)
     self.fs = fs_domain.AbstractFileSystem(
         fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, 'eid'))
Ejemplo n.º 19
0
    def post(self, exploration_id):
        """Saves an image uploaded by a content creator."""

        raw = self.request.get('image')
        filename = self.payload.get('filename')
        if not raw:
            raise self.InvalidInputException('No image supplied')

        allowed_formats = ', '.join(
            feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS.keys())

        # Verify that the data is recognized as an image.
        file_format = imghdr.what(None, h=raw)
        if file_format not in feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS:
            raise self.InvalidInputException('Image not recognized')

        # Verify that the file type matches the supplied extension.
        if not filename:
            raise self.InvalidInputException('No filename supplied')
        if filename.rfind('.') == 0:
            raise self.InvalidInputException('Invalid filename')
        if '/' in filename or '..' in filename:
            raise self.InvalidInputException(
                'Filenames should not include slashes (/) or consecutive dot '
                'characters.')
        if '.' not in filename:
            raise self.InvalidInputException(
                'Image filename with no extension: it should have '
                'one of the following extensions: %s.' % allowed_formats)

        dot_index = filename.rfind('.')
        extension = filename[dot_index + 1:].lower()
        if (extension not in
                feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS[file_format]):
            raise self.InvalidInputException(
                'Expected a filename ending in .%s, received %s' %
                (file_format, filename))

        # Image files are stored to the datastore in the dev env, and to GCS
        # in production.
        file_system_class = (fs_domain.ExplorationFileSystem if
                             (feconf.DEV_MODE
                              or not constants.ENABLE_GCS_STORAGE_FOR_IMAGES)
                             else fs_domain.GcsFileSystem)
        fs = fs_domain.AbstractFileSystem(file_system_class(exploration_id))
        filepath = (filename if
                    (feconf.DEV_MODE
                     or not constants.ENABLE_GCS_STORAGE_FOR_IMAGES) else
                    ('%s/%s' % (self._FILENAME_PREFIX, filename)))

        if fs.isfile(filepath):
            raise self.InvalidInputException(
                'A file with the name %s already exists. Please choose a '
                'different name.' % filename)

        exp_services.save_original_and_compressed_versions_of_image(
            self.user_id, filename, exploration_id, raw)

        self.render_json({'filepath': filename})
Ejemplo n.º 20
0
def save_original_and_compressed_versions_of_image(
        filename, entity_type, entity_id, original_image_content,
        filename_prefix, image_is_compressible):
    """Saves the three versions of the image file.

    Args:
        filename: str. The name of the image file.
        entity_type: str. The type of the entity.
        entity_id: str. The id of the entity.
        original_image_content: str. The content of the original image.
        filename_prefix: str. The string to prefix to the filename.
        image_is_compressible: bool. Whether the image can be compressed or
            not.
    """
    filepath = '%s/%s' % (filename_prefix, filename)

    filename_wo_filetype = filename[:filename.rfind('.')]
    filetype = filename[filename.rfind('.') + 1:]

    compressed_image_filename = '%s_compressed.%s' % (
        filename_wo_filetype, filetype)
    compressed_image_filepath = '%s/%s' % (
        filename_prefix, compressed_image_filename)

    micro_image_filename = '%s_micro.%s' % (
        filename_wo_filetype, filetype)
    micro_image_filepath = '%s/%s' % (filename_prefix, micro_image_filename)

    file_system_class = get_entity_file_system_class()
    fs = fs_domain.AbstractFileSystem(file_system_class(
        entity_type, entity_id))

    if image_is_compressible:
        compressed_image_content = gae_image_services.compress_image(
            original_image_content, 0.8)
        micro_image_content = gae_image_services.compress_image(
            original_image_content, 0.7)
    else:
        compressed_image_content = original_image_content
        micro_image_content = original_image_content

    # Because in case of CreateVersionsOfImageJob, the original image is
    # already there. Also, even if the compressed, micro versions for some
    # image exists, then this would prevent from creating another copy of
    # the same.
    if not fs.isfile(filepath.encode('utf-8')):
        fs.commit(
            filepath.encode('utf-8'), original_image_content,
            mimetype='image/%s' % filetype)

    if not fs.isfile(compressed_image_filepath.encode('utf-8')):
        fs.commit(
            compressed_image_filepath.encode('utf-8'),
            compressed_image_content, mimetype='image/%s' % filetype)

    if not fs.isfile(micro_image_filepath.encode('utf-8')):
        fs.commit(
            micro_image_filepath.encode('utf-8'),
            micro_image_content, mimetype='image/%s' % filetype)
Ejemplo n.º 21
0
    def test_independence_of_file_systems(self):
        self.fs.commit(self.user_id, 'abc.png', 'file_contents')
        self.assertEqual(self.fs.get('abc.png'), 'file_contents')

        fs2 = fs_domain.AbstractFileSystem(
            fs_domain.ExplorationFileSystem('eid2'))
        with self.assertRaisesRegexp(IOError, r'File abc\.png .* not found'):
            fs2.get('abc.png')
Ejemplo n.º 22
0
    def test_independence_of_file_systems(self):
        self.fs.commit(self.user_id, 'abc.png', 'file_contents')
        self.assertEqual(self.fs.get('abc.png'), 'file_contents')

        fs2 = fs_domain.AbstractFileSystem(
            fs_domain.DatastoreBackedFileSystem(feconf.ENTITY_TYPE_EXPLORATION,
                                                'eid2'))
        with self.assertRaisesRegexp(IOError, r'File abc\.png .* not found'):
            fs2.get('abc.png')
Ejemplo n.º 23
0
    def test_unexpected_content_rule(self):
        fs = fs_domain.AbstractFileSystem(
            fs_domain.DiskBackedFileSystem(TEST_DATA_DIR))

        CANONICAL_DATA_DIR = os.path.join(TEST_DATA_DIR, 'canonical')
        canonical_fs = fs_domain.AbstractFileSystem(
            fs_domain.DiskBackedFileSystem(CANONICAL_DATA_DIR))

        rule = tar_file_string.HasUnexpectedContent(['hello.c', 'Makefile'
                                                     ]).set_fs(canonical_fs)

        file_name = 'incorrect-contents.tar.gz'
        encoded_content = base64.b64encode(fs.get(file_name, mode='rb'))
        self.assertTrue(rule.eval(encoded_content))

        file_name = 'good.tar.gz'
        encoded_content = base64.b64encode(fs.get(file_name, mode='rb'))
        self.assertFalse(rule.eval(encoded_content))
Ejemplo n.º 24
0
 def test_copy(self):
     self.fs.commit('abc2.png', 'file_contents')
     self.assertEqual(self.fs.listdir(''), ['abc2.png'])
     destination_fs = fs_domain.AbstractFileSystem(
         fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_QUESTION,
                                 'question_id1'))
     self.assertEqual(destination_fs.listdir(''), [])
     destination_fs.copy(self.fs.impl.assets_path, 'abc2.png')
     self.assertTrue(destination_fs.isfile('abc2.png'))
Ejemplo n.º 25
0
def classify(exp_id, state, answer, params):
    """Normalize the answer and select among the answer groups the group in
    which the answer best belongs. The best group is decided by finding the
    first rule best satisfied by the answer. Returns a dict with the following
    keys:
        'outcome': A dict representing the outcome of the answer group matched.
        'answer_group_index': An index into the answer groups list indicating
            which one was selected as the group which this answer belongs to.
            This is equal to the number of answer groups if the default outcome
            was matched.
        'classification_certainty': A normalized value within the range of
            [0, 1] representing at which confidence level the answer belongs in
            the chosen answer group. A certainty of 1 means it is the best
            possible match. A certainty of 0 means it is matched to the default
            outcome.
        'rule_spec_index': An index into the rule specs list of the matched
            answer group which was selected that indicates which rule spec was
            matched. This is equal to 0 if the default outcome is selected.
    When the default rule is matched, outcome is the default_outcome of the
    state's interaction.
    """
    interaction_instance = interaction_registry.Registry.get_interaction_by_id(
        state.interaction.id)
    normalized_answer = interaction_instance.normalize_answer(answer)

    fs = fs_domain.AbstractFileSystem(fs_domain.ExplorationFileSystem(exp_id))
    input_type = interaction_instance.answer_type

    response = classify_hard_rule(state, params, input_type, normalized_answer,
                                  fs)
    if response is None:
        response = classify_soft_rule(state, params, input_type,
                                      normalized_answer, fs)
    if (interaction_instance.is_string_classifier_trainable
            and feconf.ENABLE_STRING_CLASSIFIER and response is None):
        response = classify_string_classifier_rule(state, normalized_answer)

    # The best matched group must match above a certain threshold. If no group
    # meets this requirement, then the default 'group' automatically matches
    # resulting in the outcome of the answer being the default outcome of the
    # state.
    if (response is not None and response['classification_certainty'] >=
            feconf.DEFAULT_ANSWER_GROUP_CLASSIFICATION_THRESHOLD):
        return response
    elif state.interaction.default_outcome is not None:
        return {
            'outcome': state.interaction.default_outcome.to_dict(),
            'answer_group_index': len(state.interaction.answer_groups),
            'classification_certainty': 0.0,
            'rule_spec_index': 0
        }

    raise Exception(
        'Something has seriously gone wrong with the exploration. Oppia does '
        'not know what to do with this answer. Please contact the '
        'exploration owner.')
Ejemplo n.º 26
0
    def post(self, entity_type, entity_id):
        """Saves an image uploaded by a content creator."""

        raw = self.request.get('image')
        filename = self.payload.get('filename')
        filename_prefix = self.payload.get('filename_prefix')
        if filename_prefix is None:
            filename_prefix = self._FILENAME_PREFIX
        if not raw:
            raise self.InvalidInputException('No image supplied')

        allowed_formats = ', '.join(
            list(feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS.keys()))

        # Verify that the data is recognized as an image.
        file_format = imghdr.what(None, h=raw)
        if file_format not in feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS:
            raise self.InvalidInputException('Image not recognized')

        # Verify that the file type matches the supplied extension.
        if not filename:
            raise self.InvalidInputException('No filename supplied')
        if filename.rfind('.') == 0:
            raise self.InvalidInputException('Invalid filename')
        if '/' in filename or '..' in filename:
            raise self.InvalidInputException(
                'Filenames should not include slashes (/) or consecutive '
                'dot characters.')
        if '.' not in filename:
            raise self.InvalidInputException(
                'Image filename with no extension: it should have '
                'one of the following extensions: %s.' % allowed_formats)

        dot_index = filename.rfind('.')
        extension = filename[dot_index + 1:].lower()
        if (extension not in
                feconf.ACCEPTED_IMAGE_FORMATS_AND_EXTENSIONS[file_format]):
            raise self.InvalidInputException(
                'Expected a filename ending in .%s, received %s' %
                (file_format, filename))

        file_system_class = fs_services.get_entity_file_system_class()
        fs = fs_domain.AbstractFileSystem(
            file_system_class(entity_type, entity_id))
        filepath = '%s/%s' % (filename_prefix, filename)

        if fs.isfile(filepath):
            raise self.InvalidInputException(
                'A file with the name %s already exists. Please choose a '
                'different name.' % filename)

        fs_services.save_original_and_compressed_versions_of_image(
            filename, entity_type, entity_id, raw, filename_prefix)

        self.render_json({'filename': filename})
Ejemplo n.º 27
0
 def setUp(self):
     super(FileSystemClassifierDataTests, self).setUp()
     self.fs = fs_domain.AbstractFileSystem(
         fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, 'exp_id'))
     self.classifier_data = {
         'param1': 40,
         'param2': [34.2, 54.13, 95.23],
         'submodel': {
             'param1': 12
         }
     }
Ejemplo n.º 28
0
    def map(exp_model):
        if not feconf.DEV_MODE:
            exp_id = exp_model.id
            fs_old = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(exp_id))
            # We have to make sure we pass the dir name without starting or
            # ending with '/'.
            image_urls = fs_old.listdir('image')
            audio_urls = fs_old.listdir('audio')

            image_filenames = [
                GCS_IMAGE_ID_REGEX.match(url).group(3) for url in image_urls
            ]
            audio_filenames = [
                GCS_AUDIO_ID_REGEX.match(url).group(3) for url in audio_urls
            ]

            for image_filename in image_filenames:
                try:
                    raw_image = fs_old.get('image/%s' % image_filename)
                    height, width = gae_image_services.get_image_dimensions(
                        raw_image)
                    filename_with_dimensions = (
                        html_validation_service.
                        regenerate_image_filename_using_dimensions(  # pylint: disable=line-too-long
                            image_filename, height, width))
                    exp_services.save_original_and_compressed_versions_of_image(  # pylint: disable=line-too-long
                        'ADMIN', filename_with_dimensions, exp_id, raw_image)
                except Exception:
                    yield (ERROR_IN_FILENAME, image_filename)

            for audio_filename in audio_filenames:
                filetype = audio_filename[audio_filename.rfind('.') + 1:]
                raw_data = fs_old.get('audio/%s' % audio_filename)
                fs = fs_domain.AbstractFileSystem(
                    fs_domain.GcsFileSystem('exploration/%s' % exp_id))
                fs.commit('Admin',
                          'audio/%s' % audio_filename,
                          raw_data,
                          mimetype='audio/%s' % filetype)
            yield (ADDED_COMPRESSED_VERSIONS_OF_IMAGES, exp_id)
Ejemplo n.º 29
0
def delete_classifier_data(exp_id, job_id):
    """Delete the classifier data from file.

    Args:
        exp_id: str. The id of the exploration.
        job_id: str. The id of the classifier training job model.
    """
    filepath = '%s-classifier-data.json' % (job_id)
    file_system_class = get_entity_file_system_class()
    fs = fs_domain.AbstractFileSystem(
        file_system_class(feconf.ENTITY_TYPE_EXPLORATION, exp_id))
    fs.delete(filepath)
Ejemplo n.º 30
0
    def get(self, page_context, page_identifier, asset_type, encoded_filename):
        """Returns an asset file.

        Args:
            page_context: str. The context of the page where the asset is
                required.
            page_identifier: str. The unique identifier for the particular
                context. Valid page_context: page_identifier pairs:
                exploration: exp_id
                story: story_id
                topic: topic_id
                skill: skill_id
                subtopic: topic_name of the topic that it is part of.
            asset_type: str. Type of the asset, either image or audio.
            encoded_filename: str. The asset filename. This
              string is encoded in the frontend using encodeURIComponent().
        """
        if not constants.DEV_MODE:
            raise self.PageNotFoundException

        try:
            filename = python_utils.urllib_unquote(encoded_filename)
            file_format = filename[(filename.rfind('.') + 1):]

            # If the following is not cast to str, an error occurs in the wsgi
            # library because unicode gets used.
            self.response.headers[
                'Content-Type'] = python_utils.convert_to_bytes(
                    '%s/%s' % (asset_type, file_format))

            if page_context == feconf.ENTITY_TYPE_SUBTOPIC:
                entity_type = feconf.ENTITY_TYPE_TOPIC
                topic = topic_fetchers.get_topic_by_name(page_identifier)
                entity_id = topic.id
            elif (page_context == feconf.ENTITY_TYPE_EXPLORATION
                  or page_context == feconf.ENTITY_TYPE_SKILL
                  or page_context == feconf.ENTITY_TYPE_TOPIC
                  or page_context == feconf.ENTITY_TYPE_STORY):
                entity_type = page_context
                entity_id = page_identifier
            else:
                raise self.InvalidInputException

            fs = fs_domain.AbstractFileSystem(
                fs_domain.DatastoreBackedFileSystem(entity_type, entity_id))
            raw = fs.get('%s/%s' % (asset_type, filename))

            self.response.cache_control.no_cache = None
            self.response.cache_control.public = True
            self.response.cache_control.max_age = 600
            self.response.write(raw)
        except:
            raise self.PageNotFoundException