Ejemplo n.º 1
0
    def map(exp_model):
        if not feconf.DEV_MODE:
            exp_id = exp_model.id
            fs_old = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(exp_id))
            # We have to make sure we pass the dir name without starting or
            # ending with '/'.
            image_urls = fs_old.listdir('image')
            audio_urls = fs_old.listdir('audio')

            image_filenames = [
                GCS_IMAGE_ID_REGEX.match(url).group(3) for url in image_urls]
            audio_filenames = [
                GCS_AUDIO_ID_REGEX.match(url).group(3) for url in audio_urls]

            for image_filename in image_filenames:
                raw_image = fs_old.get('image/%s' % image_filename)
                height, width = gae_image_services.get_image_dimensions(
                    raw_image)
                filename_with_dimensions = (
                    html_validation_service.regenerate_image_filename_using_dimensions( # pylint: disable=line-too-long
                        image_filename, height, width))
                exp_services.save_original_and_compressed_versions_of_image(
                    'ADMIN', filename_with_dimensions, exp_id, raw_image)

            for audio_filename in audio_filenames:
                filetype = audio_filename[audio_filename.rfind('.') + 1:]
                raw_data = fs_old.get('audio/%s' % audio_filename)
                fs = fs_domain.AbstractFileSystem(
                    fs_domain.GcsFileSystem('exploration/%s' % exp_id))
                fs.commit(
                    'Admin', 'audio/%s' % audio_filename,
                    raw_data, mimetype='audio/%s' % filetype)
            yield (ADDED_COMPRESSED_VERSIONS_OF_IMAGES, exp_id)
Ejemplo n.º 2
0
    def map(exp_model):
        if not constants.DEV_MODE:
            exp_id = exp_model.id
            fs_old = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(exp_id))
            # We have to make sure we pass the dir name without starting or
            # ending with '/'.
            image_urls = fs_old.listdir('image')
            audio_urls = fs_old.listdir('audio')

            image_filenames = [
                GCS_IMAGE_ID_REGEX.match(url).group(3) for url in image_urls]
            audio_filenames = [
                GCS_AUDIO_ID_REGEX.match(url).group(3) for url in audio_urls]

            references_unicode_files = False
            for image_filename in image_filenames:
                try:
                    fs_old.get('image/%s' % image_filename)
                except Exception:
                    references_unicode_files = True
                    break

            if references_unicode_files:
                image_filenames_str = '%s' % image_filenames
                yield (
                    EXP_REFERENCES_UNICODE_FILES,
                    'Exp: %s, image filenames: %s' % (
                        exp_id, image_filenames_str.encode('utf-8')))

            for image_filename in image_filenames:
                try:
                    raw_image = fs_old.get(
                        'image/%s' % image_filename.encode('utf-8'))
                    height, width = gae_image_services.get_image_dimensions(
                        raw_image)
                    filename_with_dimensions = (
                        html_validation_service.regenerate_image_filename_using_dimensions( # pylint: disable=line-too-long
                            image_filename, height, width))
                    exp_services.save_original_and_compressed_versions_of_image( # pylint: disable=line-too-long
                        'ADMIN', filename_with_dimensions, exp_id, raw_image)
                    yield ('Copied file', 1)
                except Exception:
                    error = traceback.format_exc()
                    logging.error(
                        'File %s in %s failed migration: %s' %
                        (image_filename.encode('utf-8'), exp_id, error))
                    yield (
                        ERROR_IN_FILENAME, 'Error when copying %s in %s: %s' % (
                            image_filename.encode('utf-8'), exp_id, error))

            for audio_filename in audio_filenames:
                filetype = audio_filename[audio_filename.rfind('.') + 1:]
                raw_data = fs_old.get('audio/%s' % audio_filename)
                fs = fs_domain.AbstractFileSystem(
                    fs_domain.GcsFileSystem('exploration/%s' % exp_id))
                fs.commit(
                    'Admin', 'audio/%s' % audio_filename,
                    raw_data, mimetype='audio/%s' % filetype)
            yield (ADDED_COMPRESSED_VERSIONS_OF_IMAGES, exp_id)
Ejemplo n.º 3
0
    def map(file_model):
        # This job is allowed to run only in Production environment since it
        # uses GcsFileSystem which can't be used in Development environment.
        if feconf.DEV_MODE:
            return

        instance_id = file_model.id
        filetype = instance_id[instance_id.rfind('.') + 1:]
        # To separate the image entries from the audio entries we get from
        # the FileSnapshotContentModel.
        if filetype in ALLOWED_IMAGE_EXTENSIONS:
            catched_groups = FILE_MODEL_ID_REGEX.match(instance_id)
            if not catched_groups:
                yield (WRONG_INSTANCE_ID, instance_id)
            else:
                filename = catched_groups.group(2)
                exploration_id = catched_groups.group(1)
                fs = fs_domain.AbstractFileSystem(
                    fs_domain.GcsFileSystem('exploration/%s' % exploration_id))
                fs_old = fs_domain.AbstractFileSystem(
                    fs_domain.GcsFileSystem(exploration_id))

                raw_image = fs_old.get('image/%s' % filename)
                height, width = gae_image_services.get_image_dimensions(
                    raw_image)
                filename_with_dimensions = (
                    html_validation_service.
                    regenerate_image_filename_using_dimensions(  # pylint: disable=line-too-long
                        filename, height, width))

                filename_wo_filetype = filename_with_dimensions[:
                                                                filename_with_dimensions
                                                                .rfind('.')]
                filetype = filename_with_dimensions[filename_with_dimensions.
                                                    rfind('.') + 1:]
                filepath = 'image/%s' % filename_with_dimensions
                compressed_image_filepath = ('image/%s_compressed.%s' %
                                             (filename_wo_filetype, filetype))
                micro_image_filepath = ('image/%s_micro.%s' %
                                        (filename_wo_filetype, filetype))

                exp_model = exp_models.ExplorationModel.get(exploration_id,
                                                            strict=False)
                if not fs.isfile(filepath):
                    yield (FILE_IS_NOT_IN_GCS, instance_id)
                elif not fs.isfile(compressed_image_filepath):
                    yield ('Compressed file not in GCS', instance_id)
                elif not fs.isfile(micro_image_filepath):
                    yield ('Micro file not in GCS', instance_id)
                else:
                    yield (FILE_FOUND_IN_GCS, 1)

                if not exp_model:
                    yield (FILE_REFERENCES_NON_EXISTENT_EXP_KEY, instance_id)
                elif exp_model.deleted:
                    yield (FILE_REFERENCES_DELETED_EXP_KEY, instance_id)
        else:
            yield ('Invalid filetype', filetype)
Ejemplo n.º 4
0
    def test_validate_entity_parameters(self):
        with self.assertRaisesRegexp(utils.ValidationError,
                                     'Invalid entity_id received: 1'):
            fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, 1)

        with self.assertRaisesRegexp(utils.ValidationError,
                                     'Entity id cannot be empty'):
            fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, '')

        with self.assertRaisesRegexp(
                utils.ValidationError, 'Invalid entity_name received: '
                'invalid_name.'):
            fs_domain.GcsFileSystem('invalid_name', 'exp_id')
Ejemplo n.º 5
0
    def map(exp_model):
        # This job is allowed to run only in Production environment since it
        # uses GcsFileSystem which can't be used in Development environment.
        if feconf.DEV_MODE:
            return

        exp_id = exp_model.id
        fs_old = fs_domain.AbstractFileSystem(fs_domain.GcsFileSystem(exp_id))
        fs_new = fs_domain.AbstractFileSystem(
            fs_domain.GcsFileSystem('exploration/%s' % exp_id))
        # We have to make sure we pass the dir name without starting or
        # ending with '/'.
        image_urls = fs_old.listdir('image')
        for url in image_urls:
            catched_groups = GCS_IMAGE_ID_REGEX.match(url)
            if not catched_groups:
                yield (INVALID_GCS_URL, url.encode('utf-8'))
            else:
                try:
                    filename = GCS_IMAGE_ID_REGEX.match(url).group(3)
                except Exception:
                    yield (ERROR_IN_FILENAME, url.encode('utf-8'))

                raw_image = fs_old.get('image/%s' % filename.encode('utf-8'))
                height, width = gae_image_services.get_image_dimensions(
                    raw_image)
                filename_with_dimensions = (
                    html_validation_service.
                    regenerate_image_filename_using_dimensions(  # pylint: disable=line-too-long
                        filename, height, width))

                filename_wo_filetype = filename_with_dimensions[:
                                                                filename_with_dimensions
                                                                .rfind('.')]
                filetype = filename_with_dimensions[filename_with_dimensions.
                                                    rfind('.') + 1:]
                filepath = 'image/%s' % filename_with_dimensions
                compressed_image_filepath = ('image/%s_compressed.%s' %
                                             (filename_wo_filetype, filetype))
                micro_image_filepath = ('image/%s_micro.%s' %
                                        (filename_wo_filetype, filetype))

                if not fs_new.isfile(filepath.encode('utf-8')):
                    yield (FILE_IS_NOT_IN_GCS, url.encode('utf-8'))
                elif not fs_new.isfile(
                        compressed_image_filepath.encode('utf-8')):
                    yield ('Compressed file not in GCS', url.encode('utf-8'))
                elif not fs_new.isfile(micro_image_filepath.encode('utf-8')):
                    yield ('Micro file not in GCS', url.encode('utf-8'))
                else:
                    yield (FILE_FOUND_IN_GCS, 1)
Ejemplo n.º 6
0
    def map(exp_model):
        if exp_model.deleted:
            return

        if not constants.DEV_MODE:
            exp_id = exp_model.id
            fs_internal = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(exp_id))
            fs_external = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem('exploration/' + exp_id))
            # We have to make sure we pass the dir name without starting or
            # ending with '/'.
            image_urls_internal = fs_internal.listdir('image')
            image_urls_external = fs_external.listdir('image')

            image_filenames_internal = set([
                GCS_IMAGE_ID_REGEX.match(url).group(3)
                for url in image_urls_internal
            ])

            image_filenames_external = set([
                GCS_EXTERNAL_IMAGE_ID_REGEX.match(url).group(3)
                for url in image_urls_external
            ])

            image_filenames_internal_with_dimensions = set([])

            for image_name in image_filenames_internal:
                raw_image = fs_internal.get('image/%s' %
                                            image_name.encode('utf-8'))
                height, width = gae_image_services.get_image_dimensions(
                    raw_image)
                image_filenames_internal_with_dimensions.add(
                    html_validation_service.
                    regenerate_image_filename_using_dimensions(  # pylint: disable=line-too-long
                        image_name, height, width))

            # Currently in editor.py and exp_services.py, all images are stored
            # in exploration/{{exp_id}}/ external folder. So, we only need to
            # check if all images in the internal folder are there in the
            # external folder.
            non_existent_images = (image_filenames_internal_with_dimensions.
                                   difference(image_filenames_external))

            if len(non_existent_images) > 0:
                yield (exp_id,
                       'Missing Images: %s' % list(non_existent_images))
            else:
                yield (ALL_IMAGES_VERIFIED, len(image_filenames_external))
Ejemplo n.º 7
0
 def map(file_snapshot_content_model):
     # This job is allowed to run only in Production environment since it
     # uses GcsFileSystem which can't be used in Development environment.
     if not feconf.DEV_MODE:
         instance_id = (
             file_snapshot_content_model.get_unversioned_instance_id())
         filetype = instance_id[instance_id.rfind('.') + 1:]
         # To separate the image entries from the audio entries we get from
         # the FileSnapshotContentModel.
         if filetype in ALLOWED_IMAGE_EXTENSIONS:
             pattern = re.compile(r'^/([^/]+)/assets/(([^/]+)\.(' +
                                  '|'.join(ALLOWED_IMAGE_EXTENSIONS) +
                                  '))$')
             catched_groups = pattern.match(instance_id)
             if not catched_groups:
                 yield (WRONG_INSTANCE_ID, instance_id)
             else:
                 filename = catched_groups.group(2)
                 filepath = 'assets/' + filename
                 exploration_id = catched_groups.group(1)
                 file_model = file_models.FileModel.get_model(
                     exploration_id, filepath, False)
                 if file_model:
                     content = file_model.content
                     fs = fs_domain.AbstractFileSystem(
                         fs_domain.GcsFileSystem(exploration_id))
                     if fs.isfile('image/%s' % filename):
                         yield (FILE_ALREADY_EXISTS, file_model.id)
                     else:
                         fs.commit('ADMIN', 'image/%s' % filename, content,
                                   'image/%s' % filetype)
                         yield (FILE_COPIED, 1)
                 else:
                     yield (FOUND_DELETED_FILE, file_model.id)
Ejemplo n.º 8
0
 def test_copy_images(self):
     with utils.open_file(os.path.join(feconf.TESTS_DATA_DIR, 'img.png'),
                          'rb',
                          encoding=None) as f:
         original_image_content = f.read()
     fs_services.save_original_and_compressed_versions_of_image(
         self.FILENAME, 'exploration', self.EXPLORATION_ID,
         original_image_content, 'image', True)
     destination_fs = fs_domain.AbstractFileSystem(
         fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_QUESTION,
                                 'question_id1'))
     self.assertFalse(destination_fs.isfile('image/%s' % self.FILENAME))
     self.assertFalse(
         destination_fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
     self.assertFalse(
         destination_fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))
     fs_services.copy_images(feconf.ENTITY_TYPE_EXPLORATION,
                             self.EXPLORATION_ID,
                             feconf.ENTITY_TYPE_QUESTION, 'question_id1',
                             ['image.png'])
     self.assertTrue(destination_fs.isfile('image/%s' % self.FILENAME))
     self.assertTrue(
         destination_fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
     self.assertTrue(
         destination_fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))
Ejemplo n.º 9
0
 def map(exp_model):
     if not constants.DEV_MODE:
         exp_id = exp_model.id
         fs_old = fs_domain.AbstractFileSystem(
             fs_domain.GcsFileSystem(exp_id))
         # We have to make sure we pass the dir name without starting or
         # ending with '/'.
         image_urls = fs_old.listdir('image')
         audio_urls = fs_old.listdir('audio')
         for url in image_urls:
             catched_groups = GCS_IMAGE_ID_REGEX.match(url)
             if not catched_groups:
                 yield (INVALID_GCS_URL, url.encode('utf-8'))
             else:
                 try:
                     filename = GCS_IMAGE_ID_REGEX.match(url).group(3)
                     if fs_old.isfile('image/%s' %
                                      filename.encode('utf-8')):
                         yield (FILE_FOUND_IN_GCS, filename.encode('utf-8'))
                 except Exception:
                     yield (ERROR_IN_FILENAME, url.encode('utf-8'))
         for url in audio_urls:
             catched_groups = GCS_AUDIO_ID_REGEX.match(url)
             if not catched_groups:
                 yield (INVALID_GCS_URL, url)
Ejemplo n.º 10
0
    def test_thumbnail_size_job_thumbnail_size_is_newly_added(self):
        self.save_new_story_with_story_contents_schema_v1(
            self.STORY_ID, 'story.svg', '#F8BF74', None, self.albert_id,
            'A title', 'A description', 'A note', self.TOPIC_ID)
        topic_services.add_canonical_story(self.albert_id, self.TOPIC_ID,
                                           self.STORY_ID)

        # Save the dummy image to the filesystem to be used as thumbnail.
        with python_utils.open_file(os.path.join(feconf.TESTS_DATA_DIR,
                                                 'test_svg.svg'),
                                    'rb',
                                    encoding=None) as f:
            raw_image = f.read()
        fs = fs_domain.AbstractFileSystem(
            fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_STORY, self.STORY_ID))
        fs.commit('%s/story.svg' % (constants.ASSET_TYPE_THUMBNAIL),
                  raw_image,
                  mimetype='image/svg+xml')

        # Start migration job.
        job_id = (story_jobs_one_off.PopulateStoryThumbnailSizeOneOffJob.
                  create_new())
        story_jobs_one_off.PopulateStoryThumbnailSizeOneOffJob.enqueue(job_id)
        self.process_and_flush_pending_mapreduce_tasks()

        output = (story_jobs_one_off.PopulateStoryThumbnailSizeOneOffJob.
                  get_output(job_id))
        expected = [[u'thumbnail_size_newly_added', 1]]
        self.assertEqual(expected, [ast.literal_eval(x) for x in output])
Ejemplo n.º 11
0
 def map(file_model):
     # This job is allowed to run only in Production environment since it
     # uses GcsFileSystem which can't be used in Development environment.
     if not feconf.DEV_MODE:
         instance_id = file_model.id
         filetype = instance_id[instance_id.rfind('.') + 1:]
         # To separate the image entries from the audio entries we get from
         # the FileSnapshotContentModel.
         if filetype in ALLOWED_IMAGE_EXTENSIONS:
             catched_groups = FILE_MODEL_ID_REGEX.match(instance_id)
             if not catched_groups:
                 yield (WRONG_INSTANCE_ID, instance_id)
             else:
                 filename = catched_groups.group(2)
                 exploration_id = catched_groups.group(1)
                 content = file_model.content
                 fs = fs_domain.AbstractFileSystem(
                     fs_domain.GcsFileSystem(exploration_id))
                 if fs.isfile('image/%s' % filename):
                     yield (FILE_ALREADY_EXISTS, file_model.id)
                 else:
                     fs.commit(
                         'ADMIN', 'image/%s' % filename,
                         content, mimetype='image/%s' % filetype)
                     yield (FILE_COPIED, 1)
Ejemplo n.º 12
0
    def test_listdir(self):
        self.assertItemsEqual(self.fs.listdir(''), [])

        self.fs.commit('abc.png', 'file_contents')
        self.fs.commit('abcd.png', 'file_contents_2')
        self.fs.commit('abc/abcd.png', 'file_contents_3')
        self.fs.commit('bcd/bcde.png', 'file_contents_4')

        file_names = ['abc.png', 'abc/abcd.png', 'abcd.png', 'bcd/bcde.png']

        self.assertItemsEqual(self.fs.listdir(''), file_names)

        self.assertEqual(self.fs.listdir('abc'), ['abc/abcd.png'])

        with self.assertRaisesRegexp(IOError, 'Invalid filepath'):
            self.fs.listdir('/abc')

        with self.assertRaisesRegexp(IOError,
                                     ('The dir_name should not start with /'
                                      ' or end with / : abc/')):
            self.fs.listdir('abc/')

        self.assertEqual(self.fs.listdir('fake_dir'), [])

        new_fs = fs_domain.AbstractFileSystem(
            fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, 'eid2'))
        self.assertEqual(new_fs.listdir('assets'), [])
Ejemplo n.º 13
0
    def test_save_original_and_compressed_versions_of_svg_image(self):
        with utils.open_file(os.path.join(feconf.TESTS_DATA_DIR,
                                          'test_svg.svg'),
                             'rb',
                             encoding=None) as f:
            image_content = f.read()

        with self.swap(constants, 'DEV_MODE', False):
            fs = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION,
                                        self.EXPLORATION_ID))

            self.assertFalse(fs.isfile('image/%s' % self.FILENAME))
            self.assertFalse(
                fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
            self.assertFalse(fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))

            fs_services.save_original_and_compressed_versions_of_image(
                self.FILENAME, 'exploration', self.EXPLORATION_ID,
                image_content, 'image', False)

            self.assertTrue(fs.isfile('image/%s' % self.FILENAME))
            self.assertTrue(
                fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
            self.assertTrue(fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))

            original_image_content = fs.get('image/%s' % self.FILENAME)
            compressed_image_content = fs.get('image/%s' %
                                              self.COMPRESSED_IMAGE_FILENAME)
            micro_image_content = fs.get('image/%s' %
                                         self.MICRO_IMAGE_FILENAME)

            self.assertEqual(original_image_content, image_content)
            self.assertEqual(compressed_image_content, image_content)
            self.assertEqual(micro_image_content, image_content)
Ejemplo n.º 14
0
    def test_job_with_thumbnail_in_filesystem_logs_success(self):
        self.save_new_topic(
            self.TOPIC_ID, self.albert_id, name='A name',
            abbreviated_name='abbrev', description='description',
            thumbnail_size_in_bytes=None)

        # Save the dummy image to the filesystem to be used as thumbnail.
        with python_utils.open_file(
            os.path.join(feconf.TESTS_DATA_DIR, 'test_svg.svg'),
            'rb', encoding=None) as f:
            raw_image = f.read()
        fs = fs_domain.AbstractFileSystem(
            fs_domain.GcsFileSystem(
                feconf.ENTITY_TYPE_TOPIC, self.TOPIC_ID))
        fs.commit(
            '%s/topic.svg' % (constants.ASSET_TYPE_THUMBNAIL), raw_image,
            mimetype='image/svg+xml')

        # Start migration job on sample topic.
        job_id = (
            topic_jobs_one_off.PopulateTopicThumbnailSizeOneOffJob.create_new()
        )
        topic_jobs_one_off.PopulateTopicThumbnailSizeOneOffJob.enqueue(job_id)

        # This running without errors indicates that deleted topics are
        # skipped.
        self.process_and_flush_pending_mapreduce_tasks()

        output = (
            topic_jobs_one_off.PopulateTopicThumbnailSizeOneOffJob.get_output(
                job_id))
        expected = [[u'thumbnail_size_newly_added', 1]]

        self.assertEqual(expected, [ast.literal_eval(x) for x in output])
        topic_services.delete_topic(self.albert_id, self.TOPIC_ID)
Ejemplo n.º 15
0
 def setUp(self):
     super(GcsFileSystemUnitTests, self).setUp()
     self.USER_EMAIL = '*****@*****.**'
     self.signup(self.USER_EMAIL, 'username')
     self.user_id = self.get_user_id_from_email(self.USER_EMAIL)
     self.fs = fs_domain.AbstractFileSystem(
         fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, 'eid'))
Ejemplo n.º 16
0
 def test_copy(self):
     self.fs.commit('abc2.png', 'file_contents')
     self.assertEqual(self.fs.listdir(''), ['abc2.png'])
     destination_fs = fs_domain.AbstractFileSystem(
         fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_QUESTION,
                                 'question_id1'))
     self.assertEqual(destination_fs.listdir(''), [])
     destination_fs.copy(self.fs.impl.assets_path, 'abc2.png')
     self.assertTrue(destination_fs.isfile('abc2.png'))
Ejemplo n.º 17
0
 def setUp(self):
     super(FileSystemClassifierDataTests, self).setUp()
     self.fs = fs_domain.AbstractFileSystem(
         fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, 'exp_id'))
     self.classifier_data = {
         'param1': 40,
         'param2': [34.2, 54.13, 95.23],
         'submodel': {
             'param1': 12
         }
     }
Ejemplo n.º 18
0
    def get(self, page_context, page_identifier, asset_type, encoded_filename):
        """Returns an asset file.

        Args:
            page_context: str. The context of the page where the asset is
                required.
            page_identifier: str. The unique identifier for the particular
                context. Valid page_context: page_identifier pairs:
                exploration: exp_id
                story: story_id
                topic: topic_id
                skill: skill_id
                subtopic: topic_name of the topic that it is part of.
            asset_type: str. Type of the asset, either image or audio.
            encoded_filename: str. The asset filename. This
              string is encoded in the frontend using encodeURIComponent().
        """
        if not constants.DEV_MODE:
            raise self.PageNotFoundException

        try:
            filename = python_utils.urllib_unquote(encoded_filename)
            file_format = filename[(filename.rfind('.') + 1):]

            # If the following is not cast to str, an error occurs in the wsgi
            # library because unicode gets used.
            self.response.headers[
                b'Content-Type'] = python_utils.convert_to_bytes(
                    '%s/%s' % (asset_type, file_format))

            if page_context not in self._SUPPORTED_PAGE_CONTEXTS:
                raise self.InvalidInputException

            if page_context == feconf.ENTITY_TYPE_SUBTOPIC:
                entity_type = feconf.ENTITY_TYPE_TOPIC
                topic = topic_fetchers.get_topic_by_name(page_identifier)
                entity_id = topic.id
            else:
                entity_type = page_context
                entity_id = page_identifier

            fs = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(entity_type, entity_id))
            raw = fs.get('%s/%s' % (asset_type, filename))

            self.response.cache_control.no_cache = None
            self.response.cache_control.public = True
            self.response.cache_control.max_age = 600
            self.response.write(raw)
        except:
            raise self.PageNotFoundException
Ejemplo n.º 19
0
 def setUp(self):
     super(FileSystemClassifierDataTests, self).setUp()
     self.fs = fs_domain.AbstractFileSystem(
         fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, 'exp_id'))
     self.classifier_data_proto = (
         text_classifier_pb2.TextClassifierFrozenModel())
     self.classifier_data_proto.model_json = json.dumps({
         'param1':
         40,
         'param2': [34.2, 54.13, 95.23],
         'submodel': {
             'param1': 12
         }
     })
Ejemplo n.º 20
0
    def test_compress_image_on_prod_mode_with_big_image_size(self):
        prod_mode_swap = self.swap(constants, 'DEV_MODE', False)
        # This swap is done to make the image's dimensions greater than
        # MAX_RESIZE_DIMENSION_PX so that it can be treated as a big image.
        max_resize_dimension_px_swap = self.swap(gae_image_services,
                                                 'MAX_RESIZE_DIMENSION_PX', 20)
        with python_utils.open_file(os.path.join(feconf.TESTS_DATA_DIR,
                                                 'img.png'),
                                    mode='rb',
                                    encoding=None) as f:
            original_image_content = f.read()

        # The scaling factor changes if the dimensions of the image is
        # greater than MAX_RESIZE_DIMENSION_PX.
        with prod_mode_swap, max_resize_dimension_px_swap:
            fs = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION,
                                        self.EXPLORATION_ID))

            self.assertFalse(fs.isfile('image/%s' % self.FILENAME))
            self.assertFalse(
                fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
            self.assertFalse(fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))

            fs_services.save_original_and_compressed_versions_of_image(
                self.FILENAME, 'exploration', self.EXPLORATION_ID,
                original_image_content, 'image')

            self.assertTrue(fs.isfile('image/%s' % self.FILENAME))
            self.assertTrue(
                fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
            self.assertTrue(fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))

            original_image_content = fs.get('image/%s' % self.FILENAME)
            compressed_image_content = fs.get('image/%s' %
                                              self.COMPRESSED_IMAGE_FILENAME)
            micro_image_content = fs.get('image/%s' %
                                         self.MICRO_IMAGE_FILENAME)

            self.assertEqual(
                gae_image_services.get_image_dimensions(
                    original_image_content), (32, 32))
            self.assertEqual(
                gae_image_services.get_image_dimensions(
                    compressed_image_content), (20, 20))
            self.assertEqual(
                gae_image_services.get_image_dimensions(micro_image_content),
                (20, 20))
Ejemplo n.º 21
0
    def map(item):
        if item.deleted:
            return

        fs = fs_domain.AbstractFileSystem(
            fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, item.id))
        filepaths = fs.listdir('image')
        for filepath in filepaths:
            filename = filepath.split('/')[-1]
            if not re.match(constants.MATH_SVG_FILENAME_REGEX, filename):
                continue
            old_svg_image = fs.get(filepath)
            xmlns_attribute_is_present = (
                html_validation_service.does_svg_tag_contains_xmlns_attribute(
                    old_svg_image))
            if not xmlns_attribute_is_present:
                yield (item.id, filename)
Ejemplo n.º 22
0
    def get(self, page_context, page_identifier, asset_type, encoded_filename):
        """Returns an asset file.

        Args:
            page_context: str. The context of the page where the asset is
                required.
            page_identifier: str. The unique identifier for the particular
                context. Valid page_context: page_identifier pairs:
                exploration: exp_id
                story: story_id
                topic: topic_id
                skill: skill_id
                subtopic: topic_name of the topic that it is part of.
            asset_type: str. Type of the asset, either image or audio.
            encoded_filename: str. The asset filename. This
                string is encoded in the frontend using encodeURIComponent().
        """
        if not constants.EMULATOR_MODE:
            raise self.PageNotFoundException

        try:
            filename = python_utils.urllib_unquote(encoded_filename)
            file_format = filename[(filename.rfind('.') + 1):]

            # If the following is not cast to str, an error occurs in the wsgi
            # library because unicode gets used.
            content_type = (
                'image/svg+xml' if file_format == 'svg' else '%s/%s' % (
                    asset_type, file_format))
            self.response.headers['Content-Type'] = content_type

            if page_context not in self._SUPPORTED_PAGE_CONTEXTS:
                raise self.InvalidInputException

            fs = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(page_context, page_identifier))
            raw = fs.get('%s/%s' % (asset_type, filename))

            self.response.cache_control.no_cache = None
            self.response.cache_control.public = True
            self.response.cache_control.max_age = 600
            self.response.body_file = io.BytesIO(raw)
        except Exception as e:
            logging.exception(
                'File not found: %s. %s' % (encoded_filename, e))
            raise self.PageNotFoundException
Ejemplo n.º 23
0
 def map(exp_model):
     if not feconf.DEV_MODE:
         exp_id = exp_model.id
         fs_old = fs_domain.AbstractFileSystem(
             fs_domain.GcsFileSystem(exp_id))
         # We have to make sure we pass the dir name without starting or
         # ending with '/'.
         image_urls = fs_old.listdir('image')
         audio_urls = fs_old.listdir('audio')
         for url in image_urls:
             catched_groups = GCS_IMAGE_ID_REGEX.match(url)
             if not catched_groups:
                 yield (INVALID_GCS_URL, url)
         for url in audio_urls:
             catched_groups = GCS_AUDIO_ID_REGEX.match(url)
             if not catched_groups:
                 yield (INVALID_GCS_URL, url)
Ejemplo n.º 24
0
    def test_compress_image_on_prod_mode_with_small_image_size(self):
        with python_utils.open_file(
            os.path.join(feconf.TESTS_DATA_DIR, 'img.png'), mode='rb',
            encoding=None) as f:
            original_image_content = f.read()

        with self.swap(constants, 'DEV_MODE', False):
            fs = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(
                    feconf.ENTITY_TYPE_EXPLORATION, self.EXPLORATION_ID))

            self.assertFalse(fs.isfile('image/%s' % self.FILENAME))
            self.assertFalse(
                fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
            self.assertFalse(fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))

            fs_services.save_original_and_compressed_versions_of_image(
                self.FILENAME, 'exploration', self.EXPLORATION_ID,
                original_image_content, 'image', True)

            self.assertTrue(fs.isfile('image/%s' % self.FILENAME))
            self.assertTrue(
                fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME))
            self.assertTrue(fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME))

            original_image_content = fs.get(
                'image/%s' % self.FILENAME)
            compressed_image_content = fs.get(
                'image/%s' % self.COMPRESSED_IMAGE_FILENAME)
            micro_image_content = fs.get(
                'image/%s' % self.MICRO_IMAGE_FILENAME)

            self.assertEqual(
                image_services.get_image_dimensions(
                    original_image_content),
                (32, 32))
            self.assertEqual(
                image_services.get_image_dimensions(
                    compressed_image_content),
                (25, 25))
            self.assertEqual(
                image_services.get_image_dimensions(
                    micro_image_content),
                (22, 22))
Ejemplo n.º 25
0
    def test_listdir(self):
        with self.swap(constants, 'DEV_MODE', False):
            self.fs.commit(self.user_id, 'abc.png', 'file_contents')
            self.fs.commit(self.user_id, 'abcd.png', 'file_contents_2')
            self.fs.commit(self.user_id, 'abc/abcd.png', 'file_contents_3')
            self.fs.commit(self.user_id, 'bcd/bcde.png', 'file_contents_4')

            bucket_name = app_identity_services.get_gcs_resource_bucket_name()
            gcs_file_dir = (
                '/%s/%s/assets/' % (
                    bucket_name, 'exploration/eid'))

            file_names = ['abc.png', 'abc/abcd.png', 'abcd.png', 'bcd/bcde.png']
            file_list = []

            for file_name in file_names:
                file_list.append(os.path.join(gcs_file_dir, file_name))

            self.assertEqual(self.fs.listdir(''), file_list)

            self.assertEqual(
                self.fs.listdir('abc'), [os.path.join(
                    gcs_file_dir, 'abc/abcd.png')])

            with self.assertRaisesRegexp(IOError, 'Invalid filepath'):
                self.fs.listdir('/abc')

            with self.assertRaisesRegexp(
                IOError,
                (
                    'The dir_name should not start with /'
                    ' or end with / : abc/'
                )
            ):
                self.fs.listdir('abc/')

            self.assertEqual(self.fs.listdir('fake_dir'), [])

            new_fs = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem('exploration/eid2'))
            self.assertEqual(new_fs.listdir('assets'), [])
Ejemplo n.º 26
0
 def test_save_original_and_compressed_versions_of_image(self):
     with python_utils.open_file(
         os.path.join(feconf.TESTS_DATA_DIR, 'img.png'), mode='rb',
         encoding=None) as f:
         original_image_content = f.read()
     fs = fs_domain.AbstractFileSystem(
         fs_domain.GcsFileSystem(
             feconf.ENTITY_TYPE_EXPLORATION, self.EXPLORATION_ID))
     self.assertEqual(fs.isfile('image/%s' % self.FILENAME), False)
     self.assertEqual(
         fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME), False)
     self.assertEqual(
         fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME), False)
     fs_services.save_original_and_compressed_versions_of_image(
         self.FILENAME, 'exploration', self.EXPLORATION_ID,
         original_image_content, 'image', True)
     self.assertEqual(fs.isfile('image/%s' % self.FILENAME), True)
     self.assertEqual(
         fs.isfile('image/%s' % self.COMPRESSED_IMAGE_FILENAME), True)
     self.assertEqual(
         fs.isfile('image/%s' % self.MICRO_IMAGE_FILENAME), True)
Ejemplo n.º 27
0
    def test_invalid_filepaths_are_caught(self):
        fs = fs_domain.AbstractFileSystem(
            fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, 'eid'))

        invalid_filepaths = [
            '..', '../another_exploration', '../', '/..', '/abc'
        ]

        for filepath in invalid_filepaths:
            with self.assertRaisesRegexp(IOError, 'Invalid filepath'):
                fs.isfile(filepath)
            with self.assertRaisesRegexp(IOError, 'Invalid filepath'):
                fs.open(filepath)
            with self.assertRaisesRegexp(IOError, 'Invalid filepath'):
                fs.get(filepath)
            with self.assertRaisesRegexp(IOError, 'Invalid filepath'):
                fs.commit(filepath, 'raw_file')
            with self.assertRaisesRegexp(IOError, 'Invalid filepath'):
                fs.delete(filepath)
            with self.assertRaisesRegexp(IOError, 'Invalid filepath'):
                fs.listdir(filepath)
Ejemplo n.º 28
0
    def map(item):
        if item.deleted:
            return

        fs = fs_domain.AbstractFileSystem(fs_domain.GcsFileSystem(
            feconf.ENTITY_TYPE_EXPLORATION, item.id))
        filepaths = fs.listdir('image')
        count_of_unchanged_svgs = 0
        filenames_of_modified_svgs = []
        for filepath in filepaths:
            filename = filepath.split('/')[-1]
            if not re.match(constants.MATH_SVG_FILENAME_REGEX, filename):
                continue
            old_svg_image = fs.get(filepath)
            new_svg_image = (
                html_validation_service.get_svg_with_xmlns_attribute(
                    old_svg_image))
            if new_svg_image == old_svg_image:
                count_of_unchanged_svgs += 1
                continue
            try:
                image_validation_services.validate_image_and_filename(
                    new_svg_image, filename)
            except Exception as e:
                yield (
                    'FAILED validation',
                    'Exploration with id %s failed image validation for the '
                    'filename %s with following error: %s' % (
                        item.id, filename, e))
            else:
                fs.commit(
                    filepath.encode('utf-8'), new_svg_image,
                    mimetype='image/svg+xml')
                filenames_of_modified_svgs.append(filename)
        if count_of_unchanged_svgs:
            yield ('UNCHANGED', count_of_unchanged_svgs)
        if len(filenames_of_modified_svgs) > 0:
            yield (
                'SUCCESS - CHANGED Exp Id: %s' % item.id,
                filenames_of_modified_svgs)
Ejemplo n.º 29
0
    def post(self):
        """Generates structures for Android end-to-end tests.

        This handler generates structures for Android end-to-end tests in
        order to evaluate the integration of network requests from the
        Android client to the backend. This handler should only be called
        once (or otherwise raises an exception), and can only be used in
        development mode (this handler is unavailable in production).

        Note that the handler outputs an empty JSON dict when the request is
        successful.

        The specific structures that are generated:
            Topic: A topic with both a test story and a subtopic.
            Story: A story with 'android_interactions' as a exploration
                node.
            Exploration: 'android_interactions' from the local assets.
            Subtopic: A dummy subtopic to validate the topic.
            Skill: A dummy skill to validate the subtopic.

        Raises:
            Exception. When used in production mode.
            InvalidInputException. The topic is already
                created but not published.
            InvalidInputException. The topic is already published.
        """

        if not constants.DEV_MODE:
            raise Exception('Cannot load new structures data in production.')
        if topic_services.does_topic_with_name_exist(
                'Android test'):
            topic = topic_fetchers.get_topic_by_name('Android test')
            topic_rights = topic_fetchers.get_topic_rights(
                topic.id, strict=False)
            if topic_rights.topic_is_published:
                raise self.InvalidInputException(
                    'The topic is already published.')
            else:
                raise self.InvalidInputException(
                    'The topic exists but is not published.')
        exp_id = '26'
        user_id = feconf.SYSTEM_COMMITTER_ID
        # Generate new Structure id for topic, story, skill and question.
        topic_id = topic_fetchers.get_new_topic_id()
        story_id = story_services.get_new_story_id()
        skill_id = skill_services.get_new_skill_id()
        question_id = question_services.get_new_question_id()

        # Create dummy skill and question.
        skill = self._create_dummy_skill(
            skill_id, 'Dummy Skill for Android', '<p>Dummy Explanation 1</p>')
        question = self._create_dummy_question(
            question_id, 'Question 1', [skill_id])
        question_services.add_question(user_id, question)
        question_services.create_new_question_skill_link(
            user_id, question_id, skill_id, 0.3)

        # Create and update topic to validate before publishing.
        topic = topic_domain.Topic.create_default_topic(
            topic_id, 'Android test', 'test-topic-one', 'description')
        topic.update_url_fragment('test-topic')
        topic.update_meta_tag_content('tag')
        topic.update_page_title_fragment_for_web('page title for topic')
        # Save the dummy image to the filesystem to be used as thumbnail.
        with python_utils.open_file(
            os.path.join(feconf.TESTS_DATA_DIR, 'test_svg.svg'),
            'rb', encoding=None) as f:
            raw_image = f.read()
        fs = fs_domain.AbstractFileSystem(
            fs_domain.GcsFileSystem(
                feconf.ENTITY_TYPE_TOPIC, topic_id))
        fs.commit(
            '%s/test_svg.svg' % (constants.ASSET_TYPE_THUMBNAIL), raw_image,
            mimetype='image/svg+xml')
        # Update thumbnail properties.
        topic.update_thumbnail_filename('test_svg.svg')
        topic.update_thumbnail_bg_color('#C6DCDA')

        # Add other structures to the topic.
        topic.add_canonical_story(story_id)
        topic.add_uncategorized_skill_id(skill_id)
        topic.add_subtopic(1, 'Test Subtopic Title')

        # Update and validate subtopic.
        topic.update_subtopic_thumbnail_filename(1, 'test_svg.svg')
        topic.update_subtopic_thumbnail_bg_color(1, '#FFFFFF')
        topic.update_subtopic_url_fragment(1, 'suburl')
        topic.move_skill_id_to_subtopic(None, 1, skill_id)
        subtopic_page = (
            subtopic_page_domain.SubtopicPage.create_default_subtopic_page(
                1, topic_id))

        # Upload local exploration to the datastore and enable feedback.
        exp_services.load_demo(exp_id)
        rights_manager.release_ownership_of_exploration(
            user_services.get_system_user(), exp_id)
        exp_services.update_exploration(
            user_id, exp_id, [exp_domain.ExplorationChange({
                'cmd': exp_domain.CMD_EDIT_EXPLORATION_PROPERTY,
                'property_name': 'correctness_feedback_enabled',
                'new_value': True
            })], 'Changed correctness_feedback_enabled.')

        # Add and update the exploration/node to the story.
        story = story_domain.Story.create_default_story(
            story_id, 'Android End to End testing', 'Description',
            topic_id, 'android-end-to-end-testing')

        story.add_node(
            '%s%d' % (story_domain.NODE_ID_PREFIX, 1),
            'Testing with UI Automator'
        )

        story.update_node_description(
            '%s%d' % (story_domain.NODE_ID_PREFIX, 1),
            'To test all Android interactions'
        )
        story.update_node_exploration_id(
            '%s%d' % (story_domain.NODE_ID_PREFIX, 1),
            exp_id
        )

        # Save the dummy image to the filesystem to be used as thumbnail.
        with python_utils.open_file(
            os.path.join(feconf.TESTS_DATA_DIR, 'test_svg.svg'),
            'rb', encoding=None) as f:
            raw_image = f.read()
        fs = fs_domain.AbstractFileSystem(
            fs_domain.GcsFileSystem(
                feconf.ENTITY_TYPE_STORY, story_id))
        fs.commit(
            '%s/test_svg.svg' % (constants.ASSET_TYPE_THUMBNAIL), raw_image,
            mimetype='image/svg+xml')

        story.update_node_thumbnail_filename(
            '%s%d' % (story_domain.NODE_ID_PREFIX, 1),
            'test_svg.svg')
        story.update_node_thumbnail_bg_color(
            '%s%d' % (story_domain.NODE_ID_PREFIX, 1), '#F8BF74')

        # Update and validate the story.
        story.update_meta_tag_content('tag')
        story.update_thumbnail_filename('test_svg.svg')
        story.update_thumbnail_bg_color(
            constants.ALLOWED_THUMBNAIL_BG_COLORS['story'][0])

        # Save the previously created structures
        # (skill, story, topic, subtopic).
        skill_services.save_new_skill(user_id, skill)
        story_services.save_new_story(user_id, story)
        topic_services.save_new_topic(user_id, topic)
        subtopic_page_services.save_subtopic_page(
            user_id, subtopic_page, 'Added subtopic',
            [topic_domain.TopicChange({
                'cmd': topic_domain.CMD_ADD_SUBTOPIC,
                'subtopic_id': 1,
                'title': 'Dummy Subtopic Title'
            })]
        )

        # Generates translation opportunities for the Contributor Dashboard.
        exp_ids_in_story = story.story_contents.get_all_linked_exp_ids()
        opportunity_services.add_new_exploration_opportunities(
            story_id, exp_ids_in_story)

        # Publish the story and topic.
        topic_services.publish_story(topic_id, story_id, user_id)
        topic_services.publish_topic(topic_id, user_id)

        # Upload thumbnails to be accessible through AssetsDevHandler.
        self._upload_thumbnail(topic_id, feconf.ENTITY_TYPE_TOPIC)
        self._upload_thumbnail(story_id, feconf.ENTITY_TYPE_STORY)
        self.render_json({})
Ejemplo n.º 30
0
    def map(item):
        if item.deleted:
            return

        exploration = exp_fetchers.get_exploration_by_id(item.id)
        try:
            exploration.validate()
        except Exception as e:
            logging.error(
                'Exploration %s failed non-strict validation: %s' %
                (item.id, e))
            return
        if (item.states_schema_version >=
                AUDIO_DURATION_SECS_MIN_STATE_SCHEMA_VERSION
                and item.states_schema_version <=
                feconf.CURRENT_STATE_SCHEMA_VERSION):
            # Go through each exploration state to find voiceover recordings.
            for state, state_value in item.states.items():
                voiceovers_mapping = (state_value['recorded_voiceovers']
                                      ['voiceovers_mapping'])
                language_codes_to_audio_metadata = voiceovers_mapping.values()
                for language_codes in language_codes_to_audio_metadata:
                    for audio_metadata in language_codes.values():
                        # Get files using the filename.
                        filename = audio_metadata['filename']
                        try:

                            fs = (fs_domain.AbstractFileSystem(
                                fs_domain.GcsFileSystem(
                                    AUDIO_ENTITY_TYPE,
                                    item.id)))
                            raw = fs.get('%s/%s' % (AUDIO_FILE_PREFIX,
                                                    filename))
                            # Get the audio-duration from file use Mutagen.
                            tempbuffer = python_utils.string_io()
                            tempbuffer.write(raw)
                            tempbuffer.seek(0)
                            # Loads audio metadata with Mutagen.
                            audio = mp3.MP3(tempbuffer)
                            tempbuffer.close()
                            # Fetch the audio file duration from the Mutagen
                            # metadata.
                            audio_metadata['duration_secs'] = audio.info.length
                        except Exception as e:
                            logging.error(
                                'Mp3 audio file not found for %s '
                                ', caused by: %s' %
                                (filename, e))
                # Create commits to update the exploration.
                commit_cmds = [exp_domain.ExplorationChange({
                    'cmd': exp_domain.CMD_EDIT_STATE_PROPERTY,
                    'property_name': (
                        exp_domain.STATE_PROPERTY_RECORDED_VOICEOVERS),
                    'state_name': state,
                    'new_value': {
                        'voiceovers_mapping': voiceovers_mapping
                    }
                })]
                exp_services.update_exploration(
                    feconf.MIGRATION_BOT_USERNAME, item.id, commit_cmds,
                    'Update duration_secs for each voiceover recording '
                    'in the exploration.')
                yield ('SUCCESS', item.id)
        else:
            yield ('State schema version is not the '
                   'minimum version expected', item.id)