class TestAddonIndexer(TestCase):
    fixtures = ['base/users', 'base/addon_3615']

    # The base list of fields we expect to see in the mapping/extraction.
    # This only contains the fields for which we use the value directly,
    # see expected_fields() for the rest.
    simple_fields = [

    def setUp(self):
        super(TestAddonIndexer, self).setUp()
        self.transforms = (attach_tags, attach_translations)
        self.indexer = AddonIndexer()
        self.addon = Addon.objects.get(pk=3615)

    def expected_fields(cls, include_nullable=True):
        Returns a list of fields we expect to be present in the mapping and
        in the extraction method.

        Should be updated whenever you change the mapping to add/remove fields.
        # Fields that can not be directly compared with the property of the
        # same name on the Addon instance, either because the property doesn't
        # exist on the model, or it has a different name, or the value we need
        # to store in ES differs from the one in the db.
        complex_fields = [

        # Fields that need to be present in the mapping, but might be skipped
        # for extraction because they can be null.
        nullable_fields = []

        # For each translated field that needs to be indexed, we store one
        # version for each language we have an analyzer for.
        _indexed_translated_fields = ('name', 'description', 'summary')
        analyzer_fields = list(
                '%s_l10n_%s' % (field, lang)
                for lang, analyzer in SEARCH_LANGUAGE_TO_ANALYZER.items()
            ] for field in _indexed_translated_fields]))

        # It'd be annoying to hardcode `analyzer_fields`, so we generate it,
        # but to make sure the test is correct we still do a simple check of
        # the length to make sure we properly flattened the list.
        assert len(analyzer_fields) == (len(SEARCH_LANGUAGE_TO_ANALYZER) *

        # Each translated field that we want to return to the API.
        raw_translated_fields = [
            '%s_translations' % field for field in [
                'name', 'description', 'developer_comments', 'homepage',
                'summary', 'support_email', 'support_url'

        # Return a list with the base fields and the dynamic ones added.
        fields = (cls.simple_fields + complex_fields + analyzer_fields +
        if include_nullable:
            fields += nullable_fields
        return fields

    def test_mapping(self):
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        # Make sure the get_mapping() method does not return fields we did
        # not expect to be present, or omitted fields we want.
        assert set(mapping_properties.keys()) == set(self.expected_fields())

        # Make sure default_locale and translated fields are not indexed.
        assert mapping_properties['default_locale']['index'] is False
        name_translations = mapping_properties['name_translations']
        assert name_translations['properties']['lang']['index'] is False
        assert name_translations['properties']['string']['index'] is False

        # Make sure current_version mapping is set.
        assert mapping_properties['current_version']['properties']
        version_mapping = mapping_properties['current_version']['properties']
        expected_version_keys = ('id', 'compatible_apps', 'files', 'license',
                                 'release_notes_translations', 'reviewed',
        assert set(version_mapping.keys()) == set(expected_version_keys)

        # Make sure files mapping is set inside current_version.
        files_mapping = version_mapping['files']['properties']
        expected_file_keys = ('id', 'created', 'filename', 'hash',
                              'is_webextension', 'is_restart_required',
                              'is_mozilla_signed_extension', 'platform',
                              'size', 'status', 'strict_compatibility',
                              'permissions', 'optional_permissions')
        assert set(files_mapping.keys()) == set(expected_file_keys)

    def test_index_setting_boolean(self):
        """Make sure that the `index` setting is a true/false boolean.

        Old versions of ElasticSearch allowed 'no' and 'yes' strings,
        this changed with ElasticSearch 5.x.
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        assert all(
            isinstance(prop['index'], bool)
            for prop in mapping_properties.values() if 'index' in prop)

        # Make sure our version_mapping is setup correctly too.
        props = mapping_properties['current_version']['properties']

        assert all(
            isinstance(prop['index'], bool) for prop in props.values()
            if 'index' in prop)

        # As well as for current_version.files
        assert all(
            isinstance(prop['index'], bool)
            for prop in props['files']['properties'].values()
            if 'index' in prop)

    def _extract(self):
        qs = Addon.unfiltered.filter(id__in=[])
        for t in self.transforms:
            qs = qs.transform(t)
        self.addon = list(qs)[0]
        return self.indexer.extract_document(self.addon)

    def test_extract_attributes(self):
        extracted = self._extract()

        # Like test_mapping() above, but for the extraction process:
        # Make sure the method does not return fields we did not expect to be
        # present, or omitted fields we want.
        assert set(extracted.keys()) == set(

        # Check base fields values. Other tests below check the dynamic ones.
        for field_name in self.simple_fields:
            assert extracted[field_name] == getattr(self.addon, field_name)

        assert extracted['app'] == []
        assert extracted['boost'] == self.addon.average_daily_users**.2 * 4
        assert extracted['category'] == [1, 22, 71]  # From fixture.
        assert extracted['current_version']
        assert extracted['listed_authors'] == [{
            'name': u'55021 التطب',
            'id': 55021,
            'username': '******',
            'is_public': True
        assert extracted['platforms'] == []
        assert extracted['ratings'] == {
            'average': self.addon.average_rating,
            'count': self.addon.total_ratings,
            'text_count': self.addon.text_ratings_count,
        assert extracted['tags'] == []
        assert extracted['has_eula'] is True
        assert extracted['has_privacy_policy'] is True
        assert extracted['colors'] is None

    def test_extract_eula_privacy_policy(self):
        # Remove eula.
        self.addon.eula_id = None
        # Empty privacy policy should not be considered.
        self.addon.privacy_policy_id = ''
        extracted = self._extract()

        assert extracted['has_eula'] is False
        assert extracted['has_privacy_policy'] is False

    def test_extract_no_current_version(self):
        extracted = self._extract()

        assert extracted['current_version'] is None

    def test_extract_version_and_files(self):
        permissions = ['bookmarks', 'random permission']
        optional_permissions = ['cookies', 'optional permission']
        version = self.addon.current_version
        # Make the version a webextension and add a bunch of things to it to
        # test different scenarios.
        del version.all_files
        version.license = License.objects.create(name=u'My licensé',
            for file_ in version.all_files

        # Now we can run the extraction and start testing.
        extracted = self._extract()

        assert extracted['current_version']
        assert extracted['current_version']['id'] ==
        # Because strict_compatibility is False, the max version we record in
        # the index is an arbitrary super high version.
        assert extracted['current_version']['compatible_apps'] == {
                'min': 2000000200100,
                'max': version_int('*'),
                'max_human': '4.0',
                'min_human': '2.0',
        assert extracted['current_version']['license'] == {
            'builtin': 0,
            'name_translations': [{
                'lang': u'en-US',
                'string': u'My licensé'
            'url': u''
        assert extracted['current_version']['release_notes_translations'] == [
                'lang': 'en-US',
                'string': u'Fix for an important bug'
                u"Quelque chose en fran\xe7ais."
                u"\n\nQuelque chose d'autre."
        assert extracted['current_version']['reviewed'] == version.reviewed
        assert extracted['current_version']['version'] == version.version
        for index, file_ in enumerate(version.all_files):
            extracted_file = extracted['current_version']['files'][index]
            assert extracted_file['id'] ==
            assert extracted_file['created'] == file_.created
            assert extracted_file['filename'] == file_.filename
            assert extracted_file['hash'] == file_.hash
            assert extracted_file['is_webextension'] == file_.is_webextension
            assert extracted_file['is_restart_required'] == (
            assert extracted_file['is_mozilla_signed_extension'] == (
            assert extracted_file['platform'] == file_.platform
            assert extracted_file['size'] == file_.size
            assert extracted_file['status'] == file_.status
            assert (extracted_file['permissions'] == permissions)
            assert (
                extracted_file['optional_permissions'] == optional_permissions)

        assert set(extracted['platforms']) == set(

    def test_version_compatibility_with_strict_compatibility_enabled(self):
        version = self.addon.current_version
        extracted = self._extract()

        assert extracted['current_version']['compatible_apps'] == {
                'min': 2000000200100,
                'max': 4000000200100,
                'max_human': '4.0',
                'min_human': '2.0',

    def test_extract_translations(self):
        translations_name = {
            'en-US': u'Name in ënglish',
            'es': u'Name in Español',
            'it': None,  # Empty name should be ignored in extract.
        translations_description = {
            'en-US': u'Description in ënglish',
            'es': u'Description in Español',
            'fr': '',  # Empty description should be ignored in extract.
            'it': '<script>alert(42)</script>',
        self.addon.summary_id = None = translations_name
        self.addon.description = translations_description
        extracted = self._extract()
        assert extracted['name_translations'] == [
                'lang': 'en-US',
                'string': translations_name['en-US']
                'lang': 'es',
                'string': translations_name['es']
        assert extracted['description_translations'] == [{
        }, {
        }, {
        assert extracted['name_l10n_en-us'] == translations_name['en-US']
        assert extracted['name_l10n_en-gb'] == ''
        assert extracted['name_l10n_es'] == translations_name['es']
        assert extracted['name_l10n_it'] == ''
        assert (extracted['description_l10n_en-us'] ==
        assert (
            extracted['description_l10n_es'] == translations_description['es'])
        assert extracted['description_l10n_fr'] == ''
        assert (extracted['description_l10n_it'] ==
        assert extracted['summary_l10n_en-us'] == ''
        # The non-l10n fields are fallbacks in the addon's default locale, they
        # need to always contain a string.
        assert extracted['name'] == 'Name in ënglish'
        assert extracted['summary'] == ''

    def test_extract_translations_engb_default(self):
        """Make sure we do correctly extract things for en-GB default locale"""
        with self.activate('en-GB'):
            kwargs = {
                'status': amo.STATUS_APPROVED,
                'type': amo.ADDON_EXTENSION,
                'default_locale': 'en-GB',
                'name': 'Banana Bonkers',
                'description': 'Let your browser eat your bananas',
                'summary': 'Banana Summary',

            self.addon = Addon.objects.create(**kwargs)
   = {'es': 'Banana Bonkers espanole'}
            self.addon.description = {
                'es': 'Deje que su navegador coma sus plátanos'
            self.addon.summary = {'es': 'resumen banana'}

        extracted = self._extract()

        assert extracted['name_translations'] == [
                'lang': 'en-GB',
                'string': 'Banana Bonkers'
                'lang': 'es',
                'string': 'Banana Bonkers espanole'
        assert extracted['description_translations'] == [
                'lang': 'en-GB',
                'string': 'Let your browser eat your bananas'
                'lang': 'es',
                'string': 'Deje que su navegador coma sus plátanos'
        assert extracted['name_l10n_en-gb'] == 'Banana Bonkers'
        assert extracted['name_l10n_en-us'] == ''
        assert extracted['name_l10n_es'] == 'Banana Bonkers espanole'
        assert (extracted['description_l10n_en-gb'] ==
                'Let your browser eat your bananas')
        assert (extracted['description_l10n_es'] ==
                'Deje que su navegador coma sus plátanos')

    def test_extract_previews(self):
        second_preview = Preview.objects.create(addon=self.addon,
                                                    'en-US': u'My câption',
                                                    'fr': u'Mön tîtré'
                                                    'thumbnail': [199, 99],
                                                    'image': [567, 780]
        first_preview = Preview.objects.create(addon=self.addon, position=1)
        extracted = self._extract()
        assert extracted['previews']
        assert len(extracted['previews']) == 2
        assert extracted['previews'][0]['id'] ==
        assert extracted['previews'][0]['modified'] == first_preview.modified
        assert extracted['previews'][0]['caption_translations'] == []
        assert extracted['previews'][0]['sizes'] == first_preview.sizes == {}
        assert extracted['previews'][1]['id'] ==
        assert extracted['previews'][1]['modified'] == second_preview.modified
        assert extracted['previews'][1]['caption_translations'] == [{
            u'My câption'
        }, {
            u'Mön tîtré'
        assert extracted['previews'][1]['sizes'] == second_preview.sizes == {
            'thumbnail': [199, 99],
            'image': [567, 780]

        # Only raw translations dict should exist, since we don't need the
        # to search against preview captions.
        assert 'caption' not in extracted['previews'][0]
        assert 'caption' not in extracted['previews'][1]

    def test_extract_previews_statictheme(self):
        current_preview = VersionPreview.objects.create(
                'h': 1,
                's': 2,
                'l': 3,
                'ratio': 0.9
                'thumbnail': [56, 78],
                'image': [91, 234]
        second_preview = VersionPreview.objects.create(
                'thumbnail': [12, 34],
                'image': [56, 78]
        extracted = self._extract()
        assert extracted['previews']
        assert len(extracted['previews']) == 2
        assert 'caption_translations' not in extracted['previews'][0]
        assert extracted['previews'][0]['id'] ==
        assert extracted['previews'][0]['modified'] == current_preview.modified
        assert extracted['previews'][0]['sizes'] == current_preview.sizes == {
            'thumbnail': [56, 78],
            'image': [91, 234]
        assert 'caption_translations' not in extracted['previews'][1]
        assert extracted['previews'][1]['id'] ==
        assert extracted['previews'][1]['modified'] == second_preview.modified
        assert extracted['previews'][1]['sizes'] == second_preview.sizes == {
            'thumbnail': [12, 34],
            'image': [56, 78]

        # Make sure we extract colors from the first preview.
        assert extracted['colors'] == [{'h': 1, 's': 2, 'l': 3, 'ratio': 0.9}]

    def test_extract_staticthemes_somehow_no_previews(self):
        # Extracting a static theme with no previews should not fail.

        extracted = self._extract()
        assert extracted['id'] ==
        assert extracted['previews'] == []
        assert extracted['colors'] is None

    def test_reindex_tasks_group(self, create_chunked_tasks_signatures_mock):
        from olympia.addons.tasks import index_addons

        expected_ids = [
                              'channel': amo.RELEASE_CHANNEL_UNLISTED
        rval = AddonIndexer.reindex_tasks_group('addons')
        assert create_chunked_tasks_signatures_mock.call_count == 1
        assert create_chunked_tasks_signatures_mock.call_args[0] == (
            index_addons, expected_ids, 150)
        assert rval == create_chunked_tasks_signatures_mock.return_value
class TestAddonIndexer(TestCase):
    fixtures = ['base/users', 'base/addon_3615']

    # The base list of fields we expect to see in the mapping/extraction.
    # This only contains the fields for which we use the value directly,
    # see expected_fields() for the rest.
    simple_fields = [
        'average_daily_users', 'bayesian_rating', 'contributions', 'created',
        'default_locale', 'guid', 'hotness', 'icon_hash', 'icon_type', 'id',
        'is_disabled', 'is_experimental', 'last_updated', 'modified',
        'public_stats', 'requires_payment', 'slug', 'status', 'type',
        'view_source', 'weekly_downloads',

    def setUp(self):
        super(TestAddonIndexer, self).setUp()
        self.transforms = (attach_tags, attach_translations)
        self.indexer = AddonIndexer()
        self.addon = Addon.objects.get(pk=3615)

    def expected_fields(cls, include_nullable=True):
        Returns a list of fields we expect to be present in the mapping and
        in the extraction method.

        Should be updated whenever you change the mapping to add/remove fields.
        # Fields that can not be directly compared with the property of the
        # same name on the Addon instance, either because the property doesn't
        # exist on the model, or it has a different name, or the value we need
        # to store in ES differs from the one in the db.
        complex_fields = [
            'app', 'boost', 'category',
            'current_version', 'description', 'featured_for',
            'has_eula', 'has_privacy_policy',
            'has_theme_rereview', 'is_featured', 'latest_unlisted_version',
            'listed_authors', 'name', 'platforms', 'previews',
            'public_stats', 'ratings', 'summary', 'tags',

        # Fields that need to be present in the mapping, but might be skipped
        # for extraction because they can be null.
        nullable_fields = ['persona']

        # For each translated field that needs to be indexed, we store one
        # version for each language-specific analyzer we have.
        _indexed_translated_fields = ('name', 'description', 'summary')
        analyzer_fields = list(chain.from_iterable(
            [['%s_l10n_%s' % (field, analyzer) for analyzer
             in SEARCH_ANALYZER_MAP] for field in _indexed_translated_fields]))

        # It'd be annoying to hardcode `analyzer_fields`, so we generate it,
        # but to make sure the test is correct we still do a simple check of
        # the length to make sure we properly flattened the list.
        assert len(analyzer_fields) == (len(SEARCH_ANALYZER_MAP) *

        # Each translated field that we want to return to the API.
        raw_translated_fields = [
            '%s_translations' % field for field in
            ['name', 'description', 'developer_comments', 'homepage',
             'summary', 'support_email', 'support_url']]

        # Return a list with the base fields and the dynamic ones added.
        fields = (cls.simple_fields + complex_fields + analyzer_fields +
        if include_nullable:
            fields += nullable_fields
        return fields

    def test_mapping(self):
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        # Make sure the get_mapping() method does not return fields we did
        # not expect to be present, or omitted fields we want.
        assert set(mapping_properties.keys()) == set(self.expected_fields())

        # Make sure default_locale and translated fields are not indexed.
        assert mapping_properties['default_locale']['index'] is False
        name_translations = mapping_properties['name_translations']
        assert name_translations['properties']['lang']['index'] is False
        assert name_translations['properties']['string']['index'] is False

        # Make sure nothing inside 'persona' is indexed, it's only there to be
        # returned back to the API directly.
        for field in mapping_properties['persona']['properties'].values():
            assert field['index'] is False

        # Make sure current_version mapping is set.
        assert mapping_properties['current_version']['properties']
        version_mapping = mapping_properties['current_version']['properties']
        expected_version_keys = (
            'id', 'compatible_apps', 'files', 'reviewed', 'version')
        assert set(version_mapping.keys()) == set(expected_version_keys)

        # Make sure files mapping is set inside current_version.
        files_mapping = version_mapping['files']['properties']
        expected_file_keys = (
            'id', 'created', 'filename', 'hash', 'is_webextension',
            'is_restart_required', 'is_mozilla_signed_extension', 'platform',
            'size', 'status', 'strict_compatibility',
        assert set(files_mapping.keys()) == set(expected_file_keys)

    def test_index_setting_boolean(self):
        """Make sure that the `index` setting is a true/false boolean.

        Old versions of ElasticSearch allowed 'no' and 'yes' strings,
        this changed with ElasticSearch 5.x.
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        assert all(
            isinstance(prop['index'], bool)
            for prop in mapping_properties.values()
            if 'index' in prop)

        # Make sure our version_mapping is setup correctly too.
        props = mapping_properties['current_version']['properties']

        assert all(
            isinstance(prop['index'], bool)
            for prop in props.values() if 'index' in prop)

        # As well as for current_version.files
        assert all(
            isinstance(prop['index'], bool)
            for prop in props['files']['properties'].values()
            if 'index' in prop)

    def _extract(self):
        qs = Addon.unfiltered.filter(id__in=[])
        for t in self.transforms:
            qs = qs.transform(t)
        self.addon = list(qs)[0]
        return self.indexer.extract_document(self.addon)

    def test_extract_attributes(self):
        extracted = self._extract()

        # Like test_mapping() above, but for the extraction process:
        # Make sure the method does not return fields we did not expect to be
        # present, or omitted fields we want.
        assert set(extracted.keys()) == set(

        # Check base fields values. Other tests below check the dynamic ones.
        for field_name in self.simple_fields:
            assert extracted[field_name] == getattr(self.addon, field_name)

        assert extracted['app'] == []
        assert extracted['boost'] == self.addon.average_daily_users ** .2 * 4
        assert extracted['category'] == [1, 22, 71]  # From fixture.
        assert extracted['current_version']
        assert extracted['has_theme_rereview'] is None
        assert extracted['latest_unlisted_version'] is None
        assert extracted['listed_authors'] == [
            {'name': u'55021 التطب', 'id': 55021, 'username': '******',
             'is_public': True}]
        assert extracted['platforms'] == []
        assert extracted['ratings'] == {
            'average': self.addon.average_rating,
            'count': self.addon.total_ratings,
            'text_count': self.addon.text_ratings_count,
        assert extracted['tags'] == []
        assert extracted['has_eula'] is True
        assert extracted['has_privacy_policy'] is True
        assert extracted['is_featured'] is False

    def test_extract_is_featured(self):
        collection = collection_factory()
        assert self.addon.is_featured()
        extracted = self._extract()
        assert extracted['is_featured'] is True

    def test_extract_featured_for(self):
        collection = collection_factory()
        featured_collection = FeaturedCollection.objects.create(
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None]}]

        # Even if the locale for the FeaturedCollection is an empty string
        # instead of None, we extract it as None so that it keeps its special
        # meaning.
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None]}]

        collection = collection_factory()
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None, 'fr']}]

        collection = collection_factory()
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None, 'fr']},
            {'application': [], 'locales': ['de-DE']}]

    def test_extract_eula_privacy_policy(self):
        # Remove eula.
        self.addon.eula_id = None
        # Empty privacy policy should not be considered.
        self.addon.privacy_policy_id = ''
        extracted = self._extract()

        assert extracted['has_eula'] is False
        assert extracted['has_privacy_policy'] is False

    def test_extract_no_current_version(self):
        extracted = self._extract()

        assert extracted['current_version'] is None

    def test_extract_version_and_files(self):
        version = self.addon.current_version

        unlisted_version = version_factory(
            addon=self.addon, channel=amo.RELEASE_CHANNEL_UNLISTED, file_kw={
                'is_webextension': True,
        # Give one of the versions some webext permissions to test that.
            permissions=['bookmarks', 'random permission']
        extracted = self._extract()

        assert extracted['current_version']
        assert extracted['current_version']['id'] ==
        # Because strict_compatibility is False, the max version we record in
        # the index is an arbitrary super high version.
        assert extracted['current_version']['compatible_apps'] == {
                'min': 2000000200100,
                'max': 9999000000200100,
                'max_human': '4.0',
                'min_human': '2.0',
        assert extracted['current_version']['reviewed'] == version.reviewed
        assert extracted['current_version']['version'] == version.version
        for index, file_ in enumerate(version.all_files):
            extracted_file = extracted['current_version']['files'][index]
            assert extracted_file['id'] ==
            assert extracted_file['created'] == file_.created
            assert extracted_file['filename'] == file_.filename
            assert extracted_file['hash'] == file_.hash
            assert extracted_file['is_webextension'] == file_.is_webextension
            assert extracted_file['is_restart_required'] == (
            assert extracted_file['is_mozilla_signed_extension'] == (
            assert extracted_file['platform'] == file_.platform
            assert extracted_file['size'] == file_.size
            assert extracted_file['status'] == file_.status
            assert extracted_file['webext_permissions_list'] == []

        assert set(extracted['platforms']) == set([,

        version = unlisted_version
        assert extracted['latest_unlisted_version']
        assert extracted['latest_unlisted_version']['id'] ==
        # Because strict_compatibility is False, the max version we record in
        # the index is an arbitrary super high version.
        assert extracted['latest_unlisted_version']['compatible_apps'] == {
                'min': 4009900200100,
                'max': 9999000000200100,
                'max_human': '5.0.99',
                'min_human': '4.0.99',
        assert (
            extracted['latest_unlisted_version']['version'] == version.version)
        for idx, file_ in enumerate(version.all_files):
            extracted_file = extracted['latest_unlisted_version']['files'][idx]
            assert extracted_file['id'] ==
            assert extracted_file['created'] == file_.created
            assert extracted_file['filename'] == file_.filename
            assert extracted_file['hash'] == file_.hash
            assert extracted_file['is_webextension'] == file_.is_webextension
            assert extracted_file['is_mozilla_signed_extension'] == (
            assert extracted_file['is_restart_required'] == (
            assert extracted_file['platform'] == file_.platform
            assert extracted_file['size'] == file_.size
            assert extracted_file['status'] == file_.status
            assert (extracted_file['webext_permissions_list'] ==
                    file_.webext_permissions_list ==
                    ['bookmarks', 'random permission'])

    def test_version_compatibility_with_strict_compatibility_enabled(self):
        version = self.addon.current_version
        extracted = self._extract()

        assert extracted['current_version']['compatible_apps'] == {
                'min': 2000000200100,
                'max': 4000000200100,
                'max_human': '4.0',
                'min_human': '2.0',

    def test_extract_translations(self):
        translations_name = {
            'en-US': u'Name in ënglish',
            'es': u'Name in Español',
            'it': None,  # Empty name should be ignored in extract.
        translations_description = {
            'en-US': u'Description in ënglish',
            'es': u'Description in Español',
            'fr': '',  # Empty description should be ignored in extract.
            'it': '<script>alert(42)</script>',
        } = translations_name
        self.addon.description = translations_description
        extracted = self._extract()
        assert sorted(extracted['name_translations']) == sorted([
            {'lang': u'en-US', 'string': translations_name['en-US']},
            {'lang': u'es', 'string': translations_name['es']},
        assert sorted(extracted['description_translations']) == sorted([
            {'lang': u'en-US', 'string': translations_description['en-US']},
            {'lang': u'es', 'string': translations_description['es']},
            {'lang': u'it', 'string': '&lt;script&gt;alert(42)&lt;/script&gt;'}
        assert extracted['name_l10n_english'] == [translations_name['en-US']]
        assert extracted['name_l10n_spanish'] == [translations_name['es']]
        assert (extracted['description_l10n_english'] ==
        assert (extracted['description_l10n_spanish'] ==
        assert (extracted['description_l10n_italian'] ==

    def test_extract_translations_engb_default(self):
        """Make sure we do correctly extract things for en-GB default locale"""
        with self.activate('en-GB'):
            kwargs = {
                'status': amo.STATUS_PUBLIC,
                'type': amo.ADDON_EXTENSION,
                'default_locale': 'en-GB',
                'name': 'Banana Bonkers',
                'description': u'Let your browser eat your bananas',
                'summary': u'Banana Summary',

            self.addon = Addon.objects.create(**kwargs)
   = {'es': u'Banana Bonkers espanole'}
            self.addon.description = {
                'es': u'Deje que su navegador coma sus plátanos'}
            self.addon.summary = {'es': u'resumen banana'}

        extracted = self._extract()

        assert sorted(extracted['name_translations']) == sorted([
            {'lang': u'en-GB', 'string': 'Banana Bonkers'},
            {'lang': u'es', 'string': u'Banana Bonkers espanole'},
        assert sorted(extracted['description_translations']) == sorted([
            {'lang': u'en-GB', 'string': u'Let your browser eat your bananas'},
                'lang': u'es',
                'string': u'Deje que su navegador coma sus plátanos'
        assert extracted['name_l10n_english'] == ['Banana Bonkers']
        assert extracted['name_l10n_spanish'] == [u'Banana Bonkers espanole']
        assert (extracted['description_l10n_english'] ==
                [u'Let your browser eat your bananas'])
        assert (extracted['description_l10n_spanish'] ==
                [u'Deje que su navegador coma sus plátanos'])

    def test_extract_persona(self):
        # Override self.addon with a persona.
        self.addon = addon_factory(persona_id=42, type=amo.ADDON_PERSONA)
        # It's a Persona, there should not be any files attached, and the
        # indexer should not care.
        assert self.addon.current_version.files.count() == 0

        persona = self.addon.persona
        persona.header = u'myheader.jpg'
        persona.footer = u'myfooter.jpg'
        persona.accentcolor = u'336699'
        persona.textcolor = u'f0f0f0' = u'Me-me-me-Myself'
        persona.display_username = u'my-username'
        persona.popularity = 1000
        extracted = self._extract()
        assert extracted['average_daily_users'] == persona.popularity
        assert extracted['weekly_downloads'] == persona.popularity * 7
        assert extracted['boost'] == float(persona.popularity ** .2) * 4
        assert extracted['has_theme_rereview'] is False
        assert extracted['persona']['accentcolor'] == persona.accentcolor
        # We need the author that will go in theme_data here, which is
        # persona.display_username, not
        assert extracted['persona']['author'] == persona.display_username
        assert extracted['persona']['header'] == persona.header
        assert extracted['persona']['footer'] == persona.footer
        assert extracted['persona']['is_new'] is False  # It has a persona_id.
        assert extracted['persona']['textcolor'] == persona.textcolor

        # Personas are always considered compatible with every platform, and
        # almost all versions of all apps.
        assert extracted['platforms'] == []
        assert extracted['current_version']['compatible_apps'] == {
                'max': 9999000000200100,
                'max_human': '9999',
                'min': 11000000200100,
                'min_human': '11.0',
                'max': 9999000000200100,
                'max_human': '9999',
                'min': 4000000200100,
                'min_human': '4.0',
        self.addon = addon_factory(persona_id=0, type=amo.ADDON_PERSONA)
        extracted = self._extract()
        assert extracted['persona']['is_new'] is True  # No persona_id.

    def test_extract_previews(self):
        second_preview = Preview.objects.create(
            addon=self.addon, position=2,
            caption={'en-US': u'My câption', 'fr': u'Mön tîtré'},
            sizes={'thumbnail': [199, 99], 'image': [567, 780]})
        first_preview = Preview.objects.create(addon=self.addon, position=1)
        extracted = self._extract()
        assert extracted['previews']
        assert len(extracted['previews']) == 2
        assert extracted['previews'][0]['id'] ==
        assert extracted['previews'][0]['modified'] == first_preview.modified
        assert extracted['previews'][0]['caption_translations'] == []
        assert extracted['previews'][0]['sizes'] == first_preview.sizes == {}
        assert extracted['previews'][1]['id'] ==
        assert extracted['previews'][1]['modified'] == second_preview.modified
        assert extracted['previews'][1]['caption_translations'] == [
            {'lang': 'en-US', 'string': u'My câption'},
            {'lang': 'fr', 'string': u'Mön tîtré'}]
        assert extracted['previews'][1]['sizes'] == second_preview.sizes == {
            'thumbnail': [199, 99], 'image': [567, 780]}

        # Only raw translations dict should exist, since we don't need the
        # to search against preview captions.
        assert 'caption' not in extracted['previews'][0]
        assert 'caption' not in extracted['previews'][1]

    def test_extract_previews_statictheme(self):
        current_preview = VersionPreview.objects.create(
            sizes={'thumbnail': [56, 78], 'image': [91, 234]})
        extracted = self._extract()
        assert extracted['previews']
        assert len(extracted['previews']) == 1
        assert 'caption_translations' not in extracted['previews'][0]
        assert extracted['previews'][0]['id'] ==
        assert extracted['previews'][0]['modified'] == current_preview.modified
        assert extracted['previews'][0]['sizes'] == current_preview.sizes == {
            'thumbnail': [56, 78], 'image': [91, 234]}
class TestAddonIndexer(TestCase):
    fixtures = ['base/users', 'base/addon_3615']

    # The base list of fields we expect to see in the mapping/extraction.
    # This only contains the fields for which we use the value directly,
    # see expected_fields() for the rest.
    simple_fields = [
        'average_daily_users', 'bayesian_rating', 'created', 'default_locale',
        'guid', 'hotness', 'icon_type', 'id', 'is_disabled', 'is_experimental',
        'is_listed', 'last_updated', 'modified', 'public_stats', 'slug',
        'status', 'type', 'view_source', 'weekly_downloads',

    def setUp(self):
        super(TestAddonIndexer, self).setUp()
        self.transforms = (attach_tags, attach_translations)
        self.indexer = AddonIndexer()
        self.addon = Addon.objects.get(pk=3615)

    def expected_fields(cls, include_nullable=True):
        Returns a list of fields we expect to be present in the mapping and
        in the extraction method.

        Should be updated whenever you change the mapping to add/remove fields.
        # Fields that can not be directly compared with the property of the
        # same name on the Addon instance, either because the property doesn't
        # exist on the model, or it has a different name, or the value we need
        # to store in ES differs from the one in the db.
        complex_fields = [
            'app', 'appversion', 'boost', 'category', 'current_beta_version',
            'current_version', 'description', 'has_eula', 'has_privacy_policy',
            'has_theme_rereview', 'has_version', 'listed_authors', 'name',
            'name_sort', 'platforms', 'previews', 'public_stats', 'ratings',
            'summary', 'tags',

        # Fields that need to be present in the mapping, but might be skipped
        # for extraction because they can be null.
        nullable_fields = ['persona']

        # For each translated field that needs to be indexed, we store one
        # version for each language-specific analyzer we have.
        _indexed_translated_fields = ('name', 'description', 'summary')
        analyzer_fields = list(chain.from_iterable(
            [['%s_%s' % (field, analyzer) for analyzer in SEARCH_ANALYZER_MAP]
             for field in _indexed_translated_fields]))

        # It'd be annoying to hardcode `analyzer_fields`, so we generate it,
        # but to make sure the test is correct we still do a simple check of
        # the length to make sure we properly flattened the list.
        assert len(analyzer_fields) == (len(SEARCH_ANALYZER_MAP) *

        # Each translated field that we want to return to the API.
        raw_translated_fields = [
            '%s_translations' % field for field in
            ['name', 'description', 'homepage', 'summary', 'support_email',

        # Return a list with the base fields and the dynamic ones added.
        fields = (cls.simple_fields + complex_fields + analyzer_fields +
        if include_nullable:
            fields += nullable_fields
        return fields

    def test_mapping(self):
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        # Make sure the get_mapping() method does not return fields we did
        # not expect to be present, or omitted fields we want.
        assert set(mapping_properties.keys()) == set(self.expected_fields())

        # Make sure default_locale and translated fields are not indexed.
        assert mapping_properties['default_locale']['index'] == 'no'
        name_translations = mapping_properties['name_translations']
        assert name_translations['properties']['lang']['index'] == 'no'
        assert name_translations['properties']['string']['index'] == 'no'

        # Make sure nothing inside 'persona' is indexed, it's only there to be
        # returned back to the API directly.
        for field in mapping_properties['persona']['properties'].values():
            assert field['index'] == 'no'

        # Make sure current_version mapping is set.
        assert mapping_properties['current_version']['properties']
        version_mapping = mapping_properties['current_version']['properties']
        expected_version_keys = (
            'id', 'compatible_apps', 'files', 'reviewed', 'version')
        assert set(version_mapping.keys()) == set(expected_version_keys)

        # Make sure files mapping is set inside current_version.
        files_mapping = version_mapping['files']['properties']
        expected_file_keys = ('id', 'created', 'filename', 'hash', 'platform',
                              'size', 'status')
        assert set(files_mapping.keys()) == set(expected_file_keys)

    def _extract(self):
        qs = Addon.unfiltered.filter(id__in=[]).no_cache()
        for t in self.transforms:
            qs = qs.transform(t)
        self.addon = list(qs)[0]
        return self.indexer.extract_document(self.addon)

    def test_extract_attributes(self):
        extracted = self._extract()

        # Like test_mapping() above, but for the extraction process:
        # Make sure the method does not return fields we did not expect to be
        # present, or omitted fields we want.
        assert set(extracted.keys()) == set(

        # Check base fields values. Other tests below check the dynamic ones.
        for field_name in self.simple_fields:
            assert extracted[field_name] == getattr(self.addon, field_name)

        assert extracted['app'] == []
        assert extracted['appversion'] == {
                'min': 2000000200100L,
                'max': 4000000200100L,
                'max_human': '4.0',
                'min_human': '2.0',
class TestAddonIndexer(TestCase):
    fixtures = ['base/users', 'base/addon_3615']

    # The base list of fields we expect to see in the mapping/extraction.
    # This only contains the fields for which we use the value directly,
    # see expected_fields() for the rest.
    simple_fields = [

    def setUp(self):
        super(TestAddonIndexer, self).setUp()
        self.transforms = (attach_tags, attach_translations)
        self.indexer = AddonIndexer()
        self.addon = Addon.objects.get(pk=3615)

    def expected_fields(cls, include_nullable=True):
        Returns a list of fields we expect to be present in the mapping and
        in the extraction method.

        Should be updated whenever you change the mapping to add/remove fields.
        # Fields that can not be directly compared with the property of the
        # same name on the Addon instance, either because the property doesn't
        # exist on the model, or it has a different name, or the value we need
        # to store in ES differs from the one in the db.
        complex_fields = [

        # Fields that need to be present in the mapping, but might be skipped
        # for extraction because they can be null.
        nullable_fields = ['persona']

        # For each translated field that needs to be indexed, we store one
        # version for each language-specific analyzer we have.
        _indexed_translated_fields = ('name', 'description', 'summary')
        analyzer_fields = list(
                '%s_l10n_%s' % (field, analyzer)
                for analyzer in SEARCH_ANALYZER_MAP
            ] for field in _indexed_translated_fields]))

        # It'd be annoying to hardcode `analyzer_fields`, so we generate it,
        # but to make sure the test is correct we still do a simple check of
        # the length to make sure we properly flattened the list.
        assert len(analyzer_fields) == (len(SEARCH_ANALYZER_MAP) *

        # Each translated field that we want to return to the API.
        raw_translated_fields = [
            '%s_translations' % field for field in [
                'name', 'description', 'developer_comments', 'homepage',
                'summary', 'support_email', 'support_url'

        # Return a list with the base fields and the dynamic ones added.
        fields = (cls.simple_fields + complex_fields + analyzer_fields +
        if include_nullable:
            fields += nullable_fields
        return fields

    def test_mapping(self):
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        # Make sure the get_mapping() method does not return fields we did
        # not expect to be present, or omitted fields we want.
        assert set(mapping_properties.keys()) == set(self.expected_fields())

        # Make sure default_locale and translated fields are not indexed.
        assert mapping_properties['default_locale']['index'] is False
        name_translations = mapping_properties['name_translations']
        assert name_translations['properties']['lang']['index'] is False
        assert name_translations['properties']['string']['index'] is False

        # Make sure nothing inside 'persona' is indexed, it's only there to be
        # returned back to the API directly.
        for field in mapping_properties['persona']['properties'].values():
            assert field['index'] is False

        # Make sure current_version mapping is set.
        assert mapping_properties['current_version']['properties']
        version_mapping = mapping_properties['current_version']['properties']
        expected_version_keys = ('id', 'compatible_apps', 'files', 'reviewed',
        assert set(version_mapping.keys()) == set(expected_version_keys)

        # Make sure files mapping is set inside current_version.
        files_mapping = version_mapping['files']['properties']
        expected_file_keys = ('id', 'created', 'filename', 'hash',
                              'is_webextension', 'is_restart_required',
                              'is_mozilla_signed_extension', 'platform',
                              'size', 'status', 'strict_compatibility',
        assert set(files_mapping.keys()) == set(expected_file_keys)

    def test_index_setting_boolean(self):
        """Make sure that the `index` setting is a true/false boolean.

        Old versions of ElasticSearch allowed 'no' and 'yes' strings,
        this changed with ElasticSearch 5.x.
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        assert all(
            isinstance(prop['index'], bool)
            for prop in mapping_properties.values() if 'index' in prop)

        # Make sure our version_mapping is setup correctly too.
        props = mapping_properties['current_version']['properties']

        assert all(
            isinstance(prop['index'], bool) for prop in props.values()
            if 'index' in prop)

        # As well as for current_version.files
        assert all(
            isinstance(prop['index'], bool)
            for prop in props['files']['properties'].values()
            if 'index' in prop)

    def _extract(self):
        qs = Addon.unfiltered.filter(id__in=[]).no_cache()
        for t in self.transforms:
            qs = qs.transform(t)
        self.addon = list(qs)[0]
        return self.indexer.extract_document(self.addon)

    def test_extract_attributes(self):
        extracted = self._extract()

        # Like test_mapping() above, but for the extraction process:
        # Make sure the method does not return fields we did not expect to be
        # present, or omitted fields we want.
        assert set(extracted.keys()) == set(

        # Check base fields values. Other tests below check the dynamic ones.
        for field_name in self.simple_fields:
            assert extracted[field_name] == getattr(self.addon, field_name)

        assert extracted['app'] == []
        assert extracted['boost'] == self.addon.average_daily_users**.2 * 4
        assert extracted['category'] == [1, 22, 71]  # From fixture.
        assert extracted['current_version']
        assert extracted['has_theme_rereview'] is None
        assert extracted['latest_unlisted_version'] is None
        assert extracted['listed_authors'] == [{
            'name': u'55021 التطب',
            'id': 55021,
            'username': '******',
            'is_public': True
        assert extracted['platforms'] == []
        assert extracted['ratings'] == {
            'average': self.addon.average_rating,
            'count': self.addon.total_ratings,
            'text_count': self.addon.text_ratings_count,
        assert extracted['tags'] == []
        assert extracted['has_eula'] is True
        assert extracted['has_privacy_policy'] is True
        assert extracted['is_featured'] is False

    def test_extract_is_featured(self):
        collection = collection_factory()
        assert self.addon.is_featured()
        extracted = self._extract()
        assert extracted['is_featured'] is True

    def test_extract_featured_for(self):
        collection = collection_factory()
        extracted = self._extract()
        assert extracted['featured_for'] == [{
            'application': [],
            'locales': [None]

        collection = collection_factory()
        extracted = self._extract()
        assert extracted['featured_for'] == [{
            'application': [],
            'locales': [None, 'fr']

        collection = collection_factory()
        extracted = self._extract()
        assert extracted['featured_for'] == [{
            'application': [],
            'locales': [None, 'fr']
        }, {
            'application': [],
            'locales': ['de-DE']

    def test_extract_eula_privacy_policy(self):
        # Remove eula.
        self.addon.eula_id = None
        # Empty privacy policy should not be considered.
        self.addon.privacy_policy_id = ''
        extracted = self._extract()

        assert extracted['has_eula'] is False
        assert extracted['has_privacy_policy'] is False

    def test_extract_no_current_version(self):
        extracted = self._extract()

        assert extracted['current_version'] is None

    def test_extract_version_and_files(self):
        version = self.addon.current_version

        unlisted_version = version_factory(
                'is_webextension': True,
        # Give one of the versions some webext permissions to test that.
            permissions=['bookmarks', 'random permission'])
        extracted = self._extract()

        assert extracted['current_version']
        assert extracted['current_version']['id'] ==
        # Because strict_compatibility is False, the max version we record in
        # the index is an arbitrary super high version.
        assert extracted['current_version']['compatible_apps'] == {
                'min': 2000000200100L,
                'max': 9999000000200100,
                'max_human': '4.0',
                'min_human': '2.0',
Beispiel #5
class TestAddonIndexer(TestCase):
    fixtures = ['base/users', 'base/addon_3615']

    # The base list of fields we expect to see in the mapping/extraction.
    # This only contains the fields for which we use the value directly,
    # see expected_fields() for the rest.
    simple_fields = [
        'average_daily_users', 'bayesian_rating', 'contributions', 'created',
        'default_locale', 'guid', 'hotness', 'icon_hash', 'icon_type', 'id',
        'is_disabled', 'is_experimental', 'last_updated', 'modified',
        'public_stats', 'requires_payment', 'slug', 'status', 'type',
        'view_source', 'weekly_downloads',

    def setUp(self):
        super(TestAddonIndexer, self).setUp()
        self.transforms = (attach_tags, attach_translations)
        self.indexer = AddonIndexer()
        self.addon = Addon.objects.get(pk=3615)

    def expected_fields(cls, include_nullable=True):
        Returns a list of fields we expect to be present in the mapping and
        in the extraction method.

        Should be updated whenever you change the mapping to add/remove fields.
        # Fields that can not be directly compared with the property of the
        # same name on the Addon instance, either because the property doesn't
        # exist on the model, or it has a different name, or the value we need
        # to store in ES differs from the one in the db.
        complex_fields = [
            'app', 'boost', 'category', 'colors', 'current_version',
            'description', 'featured_for', 'has_eula', 'has_privacy_policy',
            'is_featured', 'listed_authors', 'name',
            'platforms', 'previews', 'public_stats', 'ratings', 'summary',

        # Fields that need to be present in the mapping, but might be skipped
        # for extraction because they can be null.
        nullable_fields = ['persona']

        # For each translated field that needs to be indexed, we store one
        # version for each language-specific analyzer we have.
        _indexed_translated_fields = ('name', 'description', 'summary')
        analyzer_fields = list(chain.from_iterable(
            [['%s_l10n_%s' % (field, analyzer) for analyzer
             in SEARCH_ANALYZER_MAP] for field in _indexed_translated_fields]))

        # It'd be annoying to hardcode `analyzer_fields`, so we generate it,
        # but to make sure the test is correct we still do a simple check of
        # the length to make sure we properly flattened the list.
        assert len(analyzer_fields) == (len(SEARCH_ANALYZER_MAP) *

        # Each translated field that we want to return to the API.
        raw_translated_fields = [
            '%s_translations' % field for field in
            ['name', 'description', 'developer_comments', 'homepage',
             'summary', 'support_email', 'support_url']]

        # Return a list with the base fields and the dynamic ones added.
        fields = (cls.simple_fields + complex_fields + analyzer_fields +
        if include_nullable:
            fields += nullable_fields
        return fields

    def test_mapping(self):
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        # Make sure the get_mapping() method does not return fields we did
        # not expect to be present, or omitted fields we want.
        assert set(mapping_properties.keys()) == set(self.expected_fields())

        # Make sure default_locale and translated fields are not indexed.
        assert mapping_properties['default_locale']['index'] is False
        name_translations = mapping_properties['name_translations']
        assert name_translations['properties']['lang']['index'] is False
        assert name_translations['properties']['string']['index'] is False

        # Make sure nothing inside 'persona' is indexed, it's only there to be
        # returned back to the API directly.
        for field in mapping_properties['persona']['properties'].values():
            assert field['index'] is False

        # Make sure current_version mapping is set.
        assert mapping_properties['current_version']['properties']
        version_mapping = mapping_properties['current_version']['properties']
        expected_version_keys = (
            'id', 'compatible_apps', 'files', 'license',
            'release_notes_translations', 'reviewed', 'version')
        assert set(version_mapping.keys()) == set(expected_version_keys)

        # Make sure files mapping is set inside current_version.
        files_mapping = version_mapping['files']['properties']
        expected_file_keys = (
            'id', 'created', 'filename', 'hash', 'is_webextension',
            'is_restart_required', 'is_mozilla_signed_extension', 'platform',
            'size', 'status', 'strict_compatibility',
        assert set(files_mapping.keys()) == set(expected_file_keys)

    def test_index_setting_boolean(self):
        """Make sure that the `index` setting is a true/false boolean.

        Old versions of ElasticSearch allowed 'no' and 'yes' strings,
        this changed with ElasticSearch 5.x.
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        assert all(
            isinstance(prop['index'], bool)
            for prop in mapping_properties.values()
            if 'index' in prop)

        # Make sure our version_mapping is setup correctly too.
        props = mapping_properties['current_version']['properties']

        assert all(
            isinstance(prop['index'], bool)
            for prop in props.values() if 'index' in prop)

        # As well as for current_version.files
        assert all(
            isinstance(prop['index'], bool)
            for prop in props['files']['properties'].values()
            if 'index' in prop)

    def _extract(self):
        qs = Addon.unfiltered.filter(id__in=[])
        for t in self.transforms:
            qs = qs.transform(t)
        self.addon = list(qs)[0]
        return self.indexer.extract_document(self.addon)

    def test_extract_attributes(self):
        extracted = self._extract()

        # Like test_mapping() above, but for the extraction process:
        # Make sure the method does not return fields we did not expect to be
        # present, or omitted fields we want.
        assert set(extracted.keys()) == set(

        # Check base fields values. Other tests below check the dynamic ones.
        for field_name in self.simple_fields:
            assert extracted[field_name] == getattr(self.addon, field_name)

        assert extracted['app'] == []
        assert extracted['boost'] == self.addon.average_daily_users ** .2 * 4
        assert extracted['category'] == [1, 22, 71]  # From fixture.
        assert extracted['current_version']
        assert extracted['listed_authors'] == [
            {'name': u'55021 التطب', 'id': 55021, 'username': '******',
             'is_public': True}]
        assert extracted['platforms'] == []
        assert extracted['ratings'] == {
            'average': self.addon.average_rating,
            'count': self.addon.total_ratings,
            'text_count': self.addon.text_ratings_count,
        assert extracted['tags'] == []
        assert extracted['has_eula'] is True
        assert extracted['has_privacy_policy'] is True
        assert extracted['is_featured'] is False
        assert extracted['colors'] is None

    def test_extract_is_featured(self):
        collection = collection_factory()
        assert self.addon.is_featured()
        extracted = self._extract()
        assert extracted['is_featured'] is True

    def test_extract_featured_for(self):
        collection = collection_factory()
        featured_collection = FeaturedCollection.objects.create(
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None]}]

        # Even if the locale for the FeaturedCollection is an empty string
        # instead of None, we extract it as None so that it keeps its special
        # meaning.
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None]}]

        collection = collection_factory()
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None, 'fr']}]

        collection = collection_factory()
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None, 'fr']},
            {'application': [], 'locales': ['de-DE']}]

    def test_extract_eula_privacy_policy(self):
        # Remove eula.
        self.addon.eula_id = None
        # Empty privacy policy should not be considered.
        self.addon.privacy_policy_id = ''
        extracted = self._extract()

        assert extracted['has_eula'] is False
        assert extracted['has_privacy_policy'] is False

    def test_extract_no_current_version(self):
        extracted = self._extract()

        assert extracted['current_version'] is None

    def test_extract_version_and_files(self):
        version = self.addon.current_version
        # Make the version a webextension and add a bunch of things to it to
        # test different scenarios.
            version=version,, is_webextension=True)
        del version.all_files
        version.license = License.objects.create(
            name=u'My licensé',
            file=file_, permissions=['bookmarks', 'random permission']
        ) for file_ in version.all_files]

        # Now we can run the extraction and start testing.
        extracted = self._extract()

        assert extracted['current_version']
        assert extracted['current_version']['id'] ==
        # Because strict_compatibility is False, the max version we record in
        # the index is an arbitrary super high version.
        assert extracted['current_version']['compatible_apps'] == {
                'min': 2000000200100,
                'max': 9999000000200100,
                'max_human': '4.0',
                'min_human': '2.0',
        assert extracted['current_version']['license'] == {
            'builtin': 0,
            'name_translations': [{'lang': u'en-US', 'string': u'My licensé'}],
            'url': u''
        assert extracted['current_version']['release_notes_translations'] == [
            {'lang': 'en-US', 'string': u'Fix for an important bug'},
            {'lang': 'fr', 'string': u"Quelque chose en fran\xe7ais."
                                     u"\n\nQuelque chose d'autre."},
        assert extracted['current_version']['reviewed'] == version.reviewed
        assert extracted['current_version']['version'] == version.version
        for index, file_ in enumerate(version.all_files):
            extracted_file = extracted['current_version']['files'][index]
            assert extracted_file['id'] ==
            assert extracted_file['created'] == file_.created
            assert extracted_file['filename'] == file_.filename
            assert extracted_file['hash'] == file_.hash
            assert extracted_file['is_webextension'] == file_.is_webextension
            assert extracted_file['is_restart_required'] == (
            assert extracted_file['is_mozilla_signed_extension'] == (
            assert extracted_file['platform'] == file_.platform
            assert extracted_file['size'] == file_.size
            assert extracted_file['status'] == file_.status
            assert extracted_file['webext_permissions_list'] == [
                'bookmarks', 'random permission']

        assert set(extracted['platforms']) == set([,

    def test_version_compatibility_with_strict_compatibility_enabled(self):
        version = self.addon.current_version
        extracted = self._extract()

        assert extracted['current_version']['compatible_apps'] == {
                'min': 2000000200100,
                'max': 4000000200100,
                'max_human': '4.0',
                'min_human': '2.0',

    def test_extract_translations(self):
        translations_name = {
            'en-US': u'Name in ënglish',
            'es': u'Name in Español',
            'it': None,  # Empty name should be ignored in extract.
        translations_description = {
            'en-US': u'Description in ënglish',
            'es': u'Description in Español',
            'fr': '',  # Empty description should be ignored in extract.
            'it': '<script>alert(42)</script>',
        self.addon.summary_id = None = translations_name
        self.addon.description = translations_description
        extracted = self._extract()
        assert extracted['name_translations'] == [
            {'lang': u'en-US', 'string': translations_name['en-US']},
            {'lang': u'es', 'string': translations_name['es']},
        assert extracted['description_translations'] == [
            {'lang': u'en-US', 'string': translations_description['en-US']},
            {'lang': u'es', 'string': translations_description['es']},
            {'lang': u'it', 'string': '&lt;script&gt;alert(42)&lt;/script&gt;'}
        assert extracted['name_l10n_english'] == [translations_name['en-US']]
        assert extracted['name_l10n_spanish'] == [translations_name['es']]
        assert extracted['name_l10n_italian'] == []
        assert (extracted['description_l10n_english'] ==
        assert (extracted['description_l10n_spanish'] ==
        assert extracted['description_l10n_french'] == []
        assert (extracted['description_l10n_italian'] ==
        assert extracted['summary_l10n_english'] == []
        # The non-l10n fields are fallbacks in the addon's default locale, they
        # need to always contain a string.
        assert extracted['name'] == u'Name in ënglish'
        assert extracted['summary'] == ''

    def test_extract_translations_engb_default(self):
        """Make sure we do correctly extract things for en-GB default locale"""
        with self.activate('en-GB'):
            kwargs = {
                'status': amo.STATUS_PUBLIC,
                'type': amo.ADDON_EXTENSION,
                'default_locale': 'en-GB',
                'name': 'Banana Bonkers',
                'description': u'Let your browser eat your bananas',
                'summary': u'Banana Summary',

            self.addon = Addon.objects.create(**kwargs)
   = {'es': u'Banana Bonkers espanole'}
            self.addon.description = {
                'es': u'Deje que su navegador coma sus plátanos'}
            self.addon.summary = {'es': u'resumen banana'}

        extracted = self._extract()

        assert extracted['name_translations'] == [
            {'lang': u'en-GB', 'string': 'Banana Bonkers'},
            {'lang': u'es', 'string': u'Banana Bonkers espanole'},
        assert extracted['description_translations'] == [
            {'lang': u'en-GB', 'string': u'Let your browser eat your bananas'},
                'lang': u'es',
                'string': u'Deje que su navegador coma sus plátanos'
        assert extracted['name_l10n_english'] == ['Banana Bonkers']
        assert extracted['name_l10n_spanish'] == [u'Banana Bonkers espanole']
        assert (extracted['description_l10n_english'] ==
                [u'Let your browser eat your bananas'])
        assert (extracted['description_l10n_spanish'] ==
                [u'Deje que su navegador coma sus plátanos'])

    def test_extract_persona(self):
        # Override self.addon with a persona.
        self.addon = addon_factory(persona_id=42, type=amo.ADDON_PERSONA)
        # It's a Persona, there should not be any files attached, and the
        # indexer should not care.
        assert self.addon.current_version.files.count() == 0

        persona = self.addon.persona
        persona.header = u'myheader.jpg'
        persona.footer = u'myfooter.jpg'
        persona.accentcolor = u'336699'
        persona.textcolor = u'f0f0f0' = u'Me-me-me-Myself'
        persona.display_username = u'my-username'
        persona.popularity = 1000
        extracted = self._extract()
        assert extracted['average_daily_users'] == persona.popularity
        assert extracted['weekly_downloads'] == persona.popularity * 7
        assert extracted['boost'] == float(persona.popularity ** .2) * 4
        assert extracted['persona']['accentcolor'] == persona.accentcolor
        # We need the author that will go in theme_data here, which is
        # persona.display_username, not
        assert extracted['persona']['author'] == persona.display_username
        assert extracted['persona']['header'] == persona.header
        assert extracted['persona']['footer'] == persona.footer
        assert extracted['persona']['is_new'] is False  # It has a persona_id.
        assert extracted['persona']['textcolor'] == persona.textcolor

        # Personas are always considered compatible with every platform, and
        # almost all versions of all apps.
        assert extracted['platforms'] == []
        assert extracted['current_version']['compatible_apps'] == {
                'max': 9999000000200100,
                'max_human': '9999',
                'min': 11000000200100,
                'min_human': '11.0',
                'max': 9999000000200100,
                'max_human': '9999',
                'min': 4000000200100,
                'min_human': '4.0',
        self.addon = addon_factory(persona_id=0, type=amo.ADDON_PERSONA)
        extracted = self._extract()
        assert extracted['persona']['is_new'] is True  # No persona_id.

    def test_extract_previews(self):
        second_preview = Preview.objects.create(
            addon=self.addon, position=2,
            caption={'en-US': u'My câption', 'fr': u'Mön tîtré'},
            sizes={'thumbnail': [199, 99], 'image': [567, 780]})
        first_preview = Preview.objects.create(addon=self.addon, position=1)
        extracted = self._extract()
        assert extracted['previews']
        assert len(extracted['previews']) == 2
        assert extracted['previews'][0]['id'] ==
        assert extracted['previews'][0]['modified'] == first_preview.modified
        assert extracted['previews'][0]['caption_translations'] == []
        assert extracted['previews'][0]['sizes'] == first_preview.sizes == {}
        assert extracted['previews'][1]['id'] ==
        assert extracted['previews'][1]['modified'] == second_preview.modified
        assert extracted['previews'][1]['caption_translations'] == [
            {'lang': 'en-US', 'string': u'My câption'},
            {'lang': 'fr', 'string': u'Mön tîtré'}]
        assert extracted['previews'][1]['sizes'] == second_preview.sizes == {
            'thumbnail': [199, 99], 'image': [567, 780]}

        # Only raw translations dict should exist, since we don't need the
        # to search against preview captions.
        assert 'caption' not in extracted['previews'][0]
        assert 'caption' not in extracted['previews'][1]

    def test_extract_previews_statictheme(self):
        current_preview = VersionPreview.objects.create(
            colors=[{'h': 1, 's': 2, 'l': 3, 'ratio': 0.9}],
            sizes={'thumbnail': [56, 78], 'image': [91, 234]}, position=1)
        second_preview = VersionPreview.objects.create(
            sizes={'thumbnail': [12, 34], 'image': [56, 78]}, position=2)
        extracted = self._extract()
        assert extracted['previews']
        assert len(extracted['previews']) == 2
        assert 'caption_translations' not in extracted['previews'][0]
        assert extracted['previews'][0]['id'] ==
        assert extracted['previews'][0]['modified'] == current_preview.modified
        assert extracted['previews'][0]['sizes'] == current_preview.sizes == {
            'thumbnail': [56, 78], 'image': [91, 234]}
        assert 'caption_translations' not in extracted['previews'][1]
        assert extracted['previews'][1]['id'] ==
        assert extracted['previews'][1]['modified'] == second_preview.modified
        assert extracted['previews'][1]['sizes'] == second_preview.sizes == {
            'thumbnail': [12, 34], 'image': [56, 78]}

        # Make sure we extract colors from the first preview.
        assert extracted['colors'] == [{'h': 1, 's': 2, 'l': 3, 'ratio': 0.9}]

    def test_extract_staticthemes_somehow_no_previews(self):
        # Extracting a static theme with no previews should not fail.

        extracted = self._extract()
        assert extracted['id'] ==
        assert extracted['previews'] == []
        assert extracted['colors'] is None
Beispiel #6
class TestAddonIndexer(TestCase):
    fixtures = ['base/users', 'base/addon_3615']

    # The base list of fields we expect to see in the mapping/extraction.
    # This only contains the fields for which we use the value directly,
    # see expected_fields() for the rest.
    simple_fields = [
        'average_daily_users', 'bayesian_rating', 'contributions', 'created',
        'guid', 'hotness', 'icon_type', 'id', 'is_disabled', 'is_experimental',
        'last_updated', 'modified', 'public_stats', 'requires_payment', 'slug',
        'status', 'type', 'view_source', 'weekly_downloads',

    def setUp(self):
        super(TestAddonIndexer, self).setUp()
        self.transforms = (attach_tags, attach_translations)
        self.indexer = AddonIndexer()
        self.addon = Addon.objects.get(pk=3615)

    def expected_fields(cls, include_nullable=True):
        Returns a list of fields we expect to be present in the mapping and
        in the extraction method.

        Should be updated whenever you change the mapping to add/remove fields.
        # Fields that can not be directly compared with the property of the
        # same name on the Addon instance, either because the property doesn't
        # exist on the model, or it has a different name, or the value we need
        # to store in ES differs from the one in the db.
        complex_fields = [
            'app', 'boost', 'category', 'current_beta_version',
            'current_version', 'description', 'featured_for',
            'has_eula', 'has_privacy_policy',
            'has_theme_rereview', 'is_featured', 'latest_unlisted_version',
            'listed_authors', 'name', 'name_sort', 'platforms', 'previews',
            'public_stats', 'ratings', 'summary', 'tags',

        # Fields that need to be present in the mapping, but might be skipped
        # for extraction because they can be null.
        nullable_fields = ['persona']

        # For each translated field that needs to be indexed, we store one
        # version for each language-specific analyzer we have.
        _indexed_translated_fields = ('name', 'description', 'summary')
        analyzer_fields = list(chain.from_iterable(
            [['%s_l10n_%s' % (field, analyzer) for analyzer
             in SEARCH_ANALYZER_MAP] for field in _indexed_translated_fields]))

        # It'd be annoying to hardcode `analyzer_fields`, so we generate it,
        # but to make sure the test is correct we still do a simple check of
        # the length to make sure we properly flattened the list.
        assert len(analyzer_fields) == (len(SEARCH_ANALYZER_MAP) *

        # Each translated field that we want to return to the API.
        raw_translated_fields = [
            '%s_translations' % field for field in
            ['name', 'description', 'developer_comments', 'homepage',
             'summary', 'support_email', 'support_url']]

        # Return a list with the base fields and the dynamic ones added.
        fields = (cls.simple_fields + complex_fields + analyzer_fields +
        if include_nullable:
            fields += nullable_fields
        return fields

    def test_mapping(self):
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        # Make sure the get_mapping() method does not return fields we did
        # not expect to be present, or omitted fields we want.
        assert set(mapping_properties.keys()) == set(self.expected_fields())

        # Make sure default_locale and translated fields are not indexed.
        assert mapping_properties['default_locale']['index'] is False
        name_translations = mapping_properties['name_translations']
        assert name_translations['properties']['lang']['index'] is False
        assert name_translations['properties']['string']['index'] is False

        # Make sure nothing inside 'persona' is indexed, it's only there to be
        # returned back to the API directly.
        for field in mapping_properties['persona']['properties'].values():
            assert field['index'] is False

        # Make sure current_version mapping is set.
        assert mapping_properties['current_version']['properties']
        version_mapping = mapping_properties['current_version']['properties']
        expected_version_keys = (
            'id', 'compatible_apps', 'files', 'reviewed', 'version')
        assert set(version_mapping.keys()) == set(expected_version_keys)

        # Make sure files mapping is set inside current_version.
        files_mapping = version_mapping['files']['properties']
        expected_file_keys = (
            'id', 'created', 'filename', 'hash', 'is_webextension',
            'is_restart_required', 'is_mozilla_signed_extension', 'platform',
            'size', 'status', 'strict_compatibility',
        assert set(files_mapping.keys()) == set(expected_file_keys)

    def test_index_setting_boolean(self):
        """Make sure that the `index` setting is a true/false boolean.

        Old versions of ElasticSearch allowed 'no' and 'yes' strings,
        this changed with ElasticSearch 5.x.
        doc_name = self.indexer.get_doctype_name()
        assert doc_name

        mapping_properties = self.indexer.get_mapping()[doc_name]['properties']

        assert all(
            isinstance(prop['index'], bool)
            for prop in mapping_properties.values()
            if 'index' in prop)

        # Make sure our version_mapping is setup correctly too.
        props = mapping_properties['current_version']['properties']

        assert all(
            isinstance(prop['index'], bool)
            for prop in props.values() if 'index' in prop)

        # As well as for current_version.files
        assert all(
            isinstance(prop['index'], bool)
            for prop in props['files']['properties'].values()
            if 'index' in prop)

    def _extract(self):
        qs = Addon.unfiltered.filter(id__in=[]).no_cache()
        for t in self.transforms:
            qs = qs.transform(t)
        self.addon = list(qs)[0]
        return self.indexer.extract_document(self.addon)

    def test_extract_attributes(self):
        extracted = self._extract()

        # Like test_mapping() above, but for the extraction process:
        # Make sure the method does not return fields we did not expect to be
        # present, or omitted fields we want.
        assert set(extracted.keys()) == set(

        # Check base fields values. Other tests below check the dynamic ones.
        for field_name in self.simple_fields:
            assert extracted[field_name] == getattr(self.addon, field_name)

        assert extracted['app'] == []
        assert extracted['boost'] == self.addon.average_daily_users ** .2 * 4
        assert extracted['category'] == [1, 22, 71]  # From fixture.
        assert extracted['current_beta_version'] is None
        assert extracted['current_version']
        assert extracted['has_theme_rereview'] is None
        assert extracted['latest_unlisted_version'] is None
        assert extracted['listed_authors'] == [
            {'name': u'55021 التطب', 'id': 55021, 'username': '******',
             'is_public': True}]
        assert extracted['platforms'] == []
        assert extracted['ratings'] == {
            'average': self.addon.average_rating,
            'count': self.addon.total_ratings,
            'text_count': self.addon.text_ratings_count,
        assert extracted['tags'] == []
        assert extracted['has_eula'] is True
        assert extracted['has_privacy_policy'] is True
        assert extracted['is_featured'] is False

    def test_extract_is_featured(self):
        collection = collection_factory()
        assert self.addon.is_featured()
        extracted = self._extract()
        assert extracted['is_featured'] is True

    def test_extract_featured_for(self):
        collection = collection_factory()
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None]}]

        collection = collection_factory()
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None, 'fr']}]

        collection = collection_factory()
        extracted = self._extract()
        assert extracted['featured_for'] == [
            {'application': [], 'locales': [None, 'fr']},
            {'application': [], 'locales': ['de-DE']}]

    def test_extract_eula_privacy_policy(self):
        # Remove eula.
        self.addon.eula_id = None
        # Empty privacy policy should not be considered.
        self.addon.privacy_policy_id = ''
        extracted = self._extract()

        assert extracted['has_eula'] is False
        assert extracted['has_privacy_policy'] is False

    def test_extract_no_current_version(self):
        extracted = self._extract()

        assert extracted['current_version'] is None

    def test_extract_version_and_files(self):
        version = self.addon.current_version
        current_beta_version = version_factory(
                'status': amo.STATUS_BETA,
                'is_webextension': True,
                'is_mozilla_signed_extension': True,
        # Give one of the versions some webext permissions to test that.
            permissions=['bookmarks', 'random permission']
        unlisted_version = version_factory(
            addon=self.addon, channel=amo.RELEASE_CHANNEL_UNLISTED)
        extracted = self._extract()

        assert extracted['current_version']
        assert extracted['current_version']['id'] ==
        # Because strict_compatibility is False, the max version we record in
        # the index is an arbitrary super high version.
        assert extracted['current_version']['compatible_apps'] == {
                'min': 2000000200100L,
                'max': 9999000000200100,
                'max_human': '4.0',
                'min_human': '2.0',