Example #1
0
 def tearDown(self):
     # Cleanup to remove these from the index.
     self.app1.delete()
     self.app2.delete()
     unindex_webapps([self.app1.id, self.app2.id])
     # Required to purge the suggestions data structure. In Lucene, a
     # document is not deleted from a segment, just marked as deleted.
     WebappIndexer.get_es().optimize(WebappIndexer.get_index(), only_expunge_deletes=True)
Example #2
0
def index_webapp(ids, **kw):
    index = kw.pop('index', None) or ALIAS
    sys.stdout.write('Indexing %s apps' % len(ids))

    qs = Webapp.indexing_transformer(Webapp.uncached.filter(id__in=ids))

    docs = [WebappIndexer.extract_document(obj.id, obj=obj) for obj in qs]
    WebappIndexer.bulk_index(docs, es=ES, index=index)
Example #3
0
def index_webapp(ids, **kw):
    index = kw.pop('index', None) or ALIAS
    sys.stdout.write('Indexing %s apps' % len(ids))

    qs = Webapp.indexing_transformer(Webapp.uncached.filter(id__in=ids))

    docs = [WebappIndexer.extract_document(obj.id, obj=obj) for obj in qs]
    WebappIndexer.bulk_index(docs, es=ES, index=index)
Example #4
0
 def tearDown(self):
     # Cleanup to remove these from the index.
     self.app1.delete()
     self.app2.delete()
     unindex_webapps([self.app1.id, self.app2.id])
     # Required to purge the suggestions data structure. In Lucene, a
     # document is not deleted from a segment, just marked as deleted.
     WebappIndexer.get_es().optimize(WebappIndexer.get_index(),
                                     only_expunge_deletes=True)
Example #5
0
def index_webapp(ids, **kw):
    index = kw.pop("index", None) or ALIAS
    sys.stdout.write("Indexing %s apps" % len(ids))

    qs = Webapp.indexing_transformer(Webapp.with_deleted.no_cache().filter(id__in=ids))

    docs = []
    for obj in qs:
        try:
            docs.append(WebappIndexer.extract_document(obj.id, obj=obj))
        except:
            sys.stdout.write("Failed to index obj: {0}".format(obj.id))

    WebappIndexer.bulk_index(docs, es=ES, index=index)
Example #6
0
def index_webapps(ids, **kw):
    task_log.info('Indexing apps %s-%s. [%s]' % (ids[0], ids[-1], len(ids)))

    index = kw.pop('index', WebappIndexer.get_index())
    # Note: If reindexing is currently occurring, `get_indices` will return
    # more than one index.
    indices = get_indices(index)

    es = WebappIndexer.get_es(urls=settings.ES_URLS)
    qs = Webapp.indexing_transformer(Webapp.uncached.filter(id__in=ids))
    for obj in qs:
        doc = WebappIndexer.extract_document(obj.id, obj)
        for idx in indices:
            WebappIndexer.index(doc, id_=obj.id, es=es, index=idx)
Example #7
0
def index_webapps(ids, **kw):
    task_log.info('Indexing apps %s-%s. [%s]' % (ids[0], ids[-1], len(ids)))

    index = kw.pop('index', WebappIndexer.get_index())
    # Note: If reindexing is currently occurring, `get_indices` will return
    # more than one index.
    indices = get_indices(index)

    es = WebappIndexer.get_es(urls=settings.ES_URLS)
    qs = Webapp.indexing_transformer(Webapp.uncached.filter(id__in=ids))
    for obj in qs:
        doc = WebappIndexer.extract_document(obj.id, obj)
        for idx in indices:
            WebappIndexer.index(doc, id_=obj.id, es=es, index=idx)
Example #8
0
def index_webapp(ids, **kw):
    index = kw.pop('index', None) or ALIAS
    sys.stdout.write('Indexing %s apps' % len(ids))

    qs = Webapp.indexing_transformer(Webapp.uncached.filter(id__in=ids))

    docs = []
    for obj in qs:
        try:
            docs.append(WebappIndexer.extract_document(obj.id, obj=obj))
        except:
            sys.stdout.write('Failed to index obj: {0}'.format(obj.id))

    WebappIndexer.bulk_index(docs, es=ES, index=index)
Example #9
0
def index_webapp(ids, **kw):
    index = kw.pop('index', None) or ALIAS
    sys.stdout.write('Indexing %s apps' % len(ids))

    qs = Webapp.indexing_transformer(
        Webapp.with_deleted.no_cache().filter(id__in=ids))

    docs = []
    for obj in qs:
        try:
            docs.append(WebappIndexer.extract_document(obj.id, obj=obj))
        except:
            sys.stdout.write('Failed to index obj: {0}'.format(obj.id))

    WebappIndexer.bulk_index(docs, es=ES, index=index)
Example #10
0
def create_index(new_index, alias, settings):
    """Creates a mapping for the new index.

    - new_index: new index name
    - alias: alias name
    - settings: a dictionary of settings

    """
    sys.stdout.write('Create the mapping for index %r, alias: %r' %
                     (new_index, alias))

    # Update settings with mapping.
    settings = {
        'settings': settings,
        'mappings': WebappIndexer.get_mapping(),
    }

    # Create index and mapping.
    try:
        ES.create_index(new_index, settings)
    except pyelasticsearch.exceptions.IndexAlreadyExistsError:
        raise CommandError('New index [%s] already exists' % new_index)

    # Don't return until the health is green. By default waits for 30s.
    ES.health(new_index, wait_for_status='green', wait_for_relocating_shards=0)
Example #11
0
 def test_mapping_properties(self):
     # Spot check a few of the key properties.
     mapping = WebappIndexer.get_mapping()
     keys = mapping['webapp']['properties'].keys()
     for k in ('id', 'app_slug', 'category', 'default_locale',
               'description', 'device', 'features', 'name', 'status'):
         ok_(k in keys, 'Key %s not found in mapping properties' % k)
Example #12
0
def create_index(new_index, alias, settings):
    """Creates a mapping for the new index.

    - new_index: new index name
    - alias: alias name
    - settings: a dictionary of settings

    """
    sys.stdout.write(
        'Create the mapping for index %r, alias: %r' % (new_index, alias))

    # Update settings with mapping.
    settings = {
        'settings': settings,
        'mappings': WebappIndexer.get_mapping(),
    }

    # Create index and mapping.
    try:
        ES.create_index(new_index, settings)
    except pyelasticsearch.exceptions.IndexAlreadyExistsError:
        raise CommandError('New index [%s] already exists' % new_index)

    # Don't return until the health is green. By default waits for 30s.
    ES.health(new_index, wait_for_status='green', wait_for_relocating_shards=0)
Example #13
0
def unindex_webapps(ids, **kw):
    task_log.info("Un-indexing apps %s-%s. [%s]" % (ids[0], ids[-1], len(ids)))

    index = kw.pop("index", WebappIndexer.get_index())
    # Note: If reindexing is currently occurring, `get_indices` will return
    # more than one index.
    indices = get_indices(index)

    es = WebappIndexer.get_es(urls=settings.ES_URLS)
    for id_ in ids:
        for idx in indices:
            try:
                WebappIndexer.unindex(id_=id_, es=es, index=idx)
            except ElasticHttpNotFoundError:
                # Ignore if it's not there.
                task_log.info(u"[Webapp:%s] Unindexing app but not found in index" % id_)
Example #14
0
class ESTestCase(TestCase):
    """Base class for tests that require elasticsearch."""
    # ES is slow to set up so this uses class setup/teardown. That happens
    # outside Django transactions so be careful to clean up afterwards.
    test_es = True
    mock_es = False
    exempt_from_fixture_bundling = True  # ES doesn't support bundling (yet?)

    @classmethod
    def setUpClass(cls):
        if not settings.RUN_ES_TESTS:
            raise SkipTest('ES disabled')
        cls.es = amo.search.get_es(timeout=settings.ES_TIMEOUT)

        # The ES setting are set before we call super()
        # because we may have indexation occuring in upper classes.
        for key, index in settings.ES_INDEXES.items():
            if not index.startswith('test_'):
                settings.ES_INDEXES[key] = 'test_%s_%s' % (
                    'mkt' if settings.MARKETPLACE else 'amo', index)

        super(ESTestCase, cls).setUpClass()
        try:
            cls.es.cluster_health()
        except Exception, e:
            e.args = tuple([u'%s (it looks like ES is not running, '
                            'try starting it or set RUN_ES_TESTS=False)'
                            % e.args[0]] + list(e.args[1:]))
            raise

        cls._SEARCH_ANALYZER_MAP = amo.SEARCH_ANALYZER_MAP
        amo.SEARCH_ANALYZER_MAP = {
            'english': ['en-us'],
            'spanish': ['es'],
        }

        for index in set(settings.ES_INDEXES.values()):
            # Get the index that's pointed to by the alias.
            try:
                indices = cls.es.get_alias(index)
                index = indices[0]
            except IndexError:
                # There's no alias, just use the index.
                print 'Found no alias for %s.' % index
                index = index
            except (pyes.IndexMissingException,
                    pyelasticsearch.ElasticHttpNotFoundError):
                pass

            # Remove any alias as well.
            try:
                cls.es.delete_index(index)
            except (pyes.IndexMissingException,
                    pyelasticsearch.ElasticHttpNotFoundError) as exc:
                print 'Could not delete index %r: %s' % (index, exc)

        addons.search.setup_mapping()
        stats.search.setup_indexes()
        if settings.MARKETPLACE:
            WebappIndexer.setup_mapping()
Example #15
0
 def test_mapping_properties(self):
     # Spot check a few of the key properties.
     mapping = WebappIndexer.get_mapping()
     keys = mapping['webapp']['properties'].keys()
     for k in ('id', 'app_slug', 'category', 'default_locale',
               'description', 'device', 'features', 'name', 'status'):
         ok_(k in keys, 'Key %s not found in mapping properties' % k)
Example #16
0
    def get(self, request, *args, **kwargs):
        limit = request.GET.get('limit', 5)
        es_query = {
            'apps': {
                'completion': {
                    'field': 'name_suggest',
                    'size': limit
                },
                'text': request.GET.get('q', '').strip()
            }
        }

        results = S(WebappIndexer).get_es().send_request(
            'GET', [WebappIndexer.get_index(), '_suggest'], body=es_query)

        if 'apps' in results:
            data = results['apps'][0]['options']
        else:
            data = []
        serializer = self.get_serializer(data)
        # This returns a JSON list. Usually this is a bad idea for security
        # reasons, but we don't include any user-specific data, it's fully
        # anonymous, so we're fine.
        return HttpResponse(json.dumps(serializer.data),
                            content_type='application/x-rocketbar+json')
Example #17
0
def unindex_webapps(ids, **kw):
    task_log.info('Un-indexing apps %s-%s. [%s]' % (ids[0], ids[-1], len(ids)))

    index = kw.pop('index', WebappIndexer.get_index())
    # Note: If reindexing is currently occurring, `get_indices` will return
    # more than one index.
    indices = get_indices(index)

    es = WebappIndexer.get_es(urls=settings.ES_URLS)
    for id_ in ids:
        for idx in indices:
            try:
                WebappIndexer.unindex(id_=id_, es=es, index=idx)
            except ElasticHttpNotFoundError:
                # Ignore if it's not there.
                task_log.info(
                    u'[Webapp:%s] Unindexing app but not found in index' % id_)
Example #18
0
    def handle(self, *args, **kwargs):
        index = WebappIndexer.get_index()
        doctype = WebappIndexer.get_mapping_type_name()
        es = WebappIndexer.get_es()

        apps = Webapp.objects.values_list('id', flat=True)

        missing_ids = []

        for app in apps:
            try:
                res = es.get(index, doctype, app, fields='id')
            except ElasticHttpNotFoundError:
                # App doesn't exist in our index, add it to `missing_ids`.
                missing_ids.append(app)

        if missing_ids:
            sys.stdout.write('Adding %s doc(s) to the index.' %
                             len(missing_ids))
            index_webapps.delay(missing_ids)
        else:
            sys.stdout.write('No docs missing from index.')
Example #19
0
    def handle(self, *args, **kwargs):
        index = WebappIndexer.get_index()
        doctype = WebappIndexer.get_mapping_type_name()
        es = WebappIndexer.get_es()

        apps = Webapp.objects.values_list('id', flat=True)

        missing_ids = []

        for app in apps:
            try:
                res = es.get(index, doctype, app, fields='id')
            except ElasticHttpNotFoundError:
                # App doesn't exist in our index, add it to `missing_ids`.
                missing_ids.append(app)

        if missing_ids:
            sys.stdout.write('Adding %s doc(s) to the index.'
                             % len(missing_ids))
            index_webapps.delay(missing_ids)
        else:
            sys.stdout.write('No docs missing from index.')
Example #20
0
    def get(self, request, *args, **kwargs):
        limit = request.GET.get("limit", 5)
        es_query = {
            "apps": {"completion": {"field": "name_suggest", "size": limit}, "text": request.GET.get("q", "").strip()}
        }

        results = S(WebappIndexer).get_es().send_request("GET", [WebappIndexer.get_index(), "_suggest"], body=es_query)

        if "apps" in results:
            data = results["apps"][0]["options"]
        else:
            data = []
        serializer = self.get_serializer(data)
        # This returns a JSON list. Usually this is a bad idea for security
        # reasons, but we don't include any user-specific data, it's fully
        # anonymous, so we're fine.
        return HttpResponse(json.dumps(serializer.data), content_type="application/x-rocketbar+json")
Example #21
0
def run_indexing(index):
    """Index the objects.

    - index: name of the index

    Note: Our ES doc sizes are about 5k in size. Chunking by 100 sends ~500kb
    of data to ES at a time.

    TODO: Use celery chords here to parallelize these indexing chunks. This
          requires celery 3 (bug 825938).

    """
    sys.stdout.write('Indexing apps into index: %s' % index)

    qs = WebappIndexer.get_indexable()
    for chunk in chunked(list(qs), 100):
        index_webapp(chunk, index=index)
Example #22
0
def run_indexing(index):
    """Index the objects.

    - index: name of the index

    Note: Our ES doc sizes are about 5k in size. Chunking by 100 sends ~500kb
    of data to ES at a time.

    TODO: Use celery chords here to parallelize these indexing chunks. This
          requires celery 3 (bug 825938).

    """
    sys.stdout.write('Indexing apps into index: %s' % index)

    qs = WebappIndexer.get_indexable()
    for chunk in chunked(list(qs), 100):
        index_webapp(chunk, index=index)
Example #23
0
    def test_q_num_requests(self):
        es = WebappIndexer.get_es()
        orig_search = es.search
        es.counter = 0

        def monkey_search(*args, **kwargs):
            es.counter += 1
            return orig_search(*args, **kwargs)

        es.search = monkey_search

        res = self.client.get(self.url, data={'q': 'something'})
        eq_(res.status_code, 200)
        obj = res.json['objects'][0]
        eq_(obj['slug'], self.webapp.app_slug)

        # Verify only one search call was made.
        eq_(es.counter, 1)

        es.search = orig_search
Example #24
0
    def test_q_num_requests_no_results(self):
        es = WebappIndexer.get_es()
        orig_search = es.search
        es.counter = 0

        def monkey_search(*args, **kwargs):
            es.counter += 1
            return orig_search(*args, **kwargs)

        es.search = monkey_search

        res = self.client.get(self.url, data={'q': 'noresults'})
        eq_(res.status_code, 200)
        eq_(res.json['meta']['total_count'], 0)
        eq_(len(res.json['objects']), 0)

        # Verify only one search call was made.
        eq_(es.counter, 1)

        es.search = orig_search
Example #25
0
    def test_q_num_requests_no_results(self):
        es = WebappIndexer.get_es()
        orig_search = es.search
        es.counter = 0

        def monkey_search(*args, **kwargs):
            es.counter += 1
            return orig_search(*args, **kwargs)

        es.search = monkey_search

        res = self.client.get(self.url, data={'q': 'noresults'})
        eq_(res.status_code, 200)
        eq_(res.json['meta']['total_count'], 0)
        eq_(len(res.json['objects']), 0)

        # Verify only one search call was made.
        eq_(es.counter, 1)

        es.search = orig_search
Example #26
0
    def test_q_num_requests(self):
        es = WebappIndexer.get_es()
        orig_search = es.search
        es.counter = 0

        def monkey_search(*args, **kwargs):
            es.counter += 1
            return orig_search(*args, **kwargs)

        es.search = monkey_search

        res = self.client.get(self.url, data={'q': 'something'})
        eq_(res.status_code, 200)
        obj = res.json['objects'][0]
        eq_(obj['slug'], self.webapp.app_slug)

        # Verify only one search call was made.
        eq_(es.counter, 1)

        es.search = orig_search
Example #27
0
    def test_q_num_requests(self):
        es = WebappIndexer.get_es()
        orig_search = es.search
        es.counter = 0

        def monkey_search(*args, **kwargs):
            es.counter += 1
            return orig_search(*args, **kwargs)

        es.search = monkey_search

        res = self.client.get(self.url, data={"q": "something"})
        eq_(res.status_code, 200)
        eq_(res.json["meta"]["total_count"], 1)
        eq_(len(res.json["objects"]), 1)
        obj = res.json["objects"][0]
        eq_(obj["slug"], self.webapp.app_slug)

        # Verify only one search call was made.
        eq_(es.counter, 1)

        es.search = orig_search
Example #28
0
    def get(self, request, *args, **kwargs):
        limit = request.GET.get('limit', 5)
        es_query = {
            'apps': {
                'completion': {'field': 'name_suggest', 'size': limit},
                'text': request.GET.get('q', '').strip()
            }
        }

        results = S(WebappIndexer).get_es().send_request(
            'GET', [WebappIndexer.get_index(), '_suggest'], body=es_query)

        if 'apps' in results:
            data = results['apps'][0]['options']
        else:
            data = []
        serializer = self.get_serializer(data)
        # This returns a JSON list. Usually this is a bad idea for security
        # reasons, but we don't include any user-specific data, it's fully
        # anonymous, so we're fine.
        return HttpResponse(json.dumps(serializer.data),
                            content_type='application/x-rocketbar+json')
Example #29
0
 def test_mapping(self):
     mapping = WebappIndexer.get_mapping()
     eq_(mapping.keys(), ['webapp'])
     eq_(mapping['webapp']['_all'], {'enabled': False})
     eq_(mapping['webapp']['_boost'], {'name': '_boost', 'null_value': 1.0})
Example #30
0
 def _get_doc(self):
     qs = Webapp.indexing_transformer(
         Webapp.uncached.filter(id__in=[self.app.pk]))
     obj = qs[0]
     return obj, WebappIndexer.extract_document(obj.pk, obj)
Example #31
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        if not settings.MARKETPLACE:
            raise CommandError('This command affects only marketplace and '
                               'should be run under Marketplace settings.')

        force = kwargs.get('force', False)
        prefix = kwargs.get('prefix', '')

        if database_flagged() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            unflag_database()

        # The list of indexes that is currently aliased by `ALIAS`.
        try:
            aliases = ES.aliases(ALIAS).keys()
        except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
            aliases = []
        old_index = aliases[0] if aliases else None
        # Create a new index, using the index name with a timestamp.
        new_index = timestamp_index(prefix + ALIAS)

        # See how the index is currently configured.
        if old_index:
            try:
                s = (ES.get_settings(old_index).get(old_index,
                                                    {}).get('settings', {}))
            except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
                s = {}
        else:
            s = {}

        num_replicas = s.get('number_of_replicas',
                             settings.ES_DEFAULT_NUM_REPLICAS)
        num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS)

        # Flag the database.
        chain = flag_database.si(new_index, old_index, ALIAS)

        # Create the index and mapping.
        #
        # Note: We set num_replicas=0 here to decrease load while re-indexing.
        # In a later step we increase it which results in a more efficient bulk
        # copy in Elasticsearch.
        # For ES < 0.90 we manually enable compression.
        chain |= create_index.si(
            new_index, ALIAS, {
                'analysis': WebappIndexer.get_analysis(),
                'number_of_replicas': 0,
                'number_of_shards': num_shards,
                'store.compress.tv': True,
                'store.compress.stored': True,
                'refresh_interval': '-1'
            })

        # Index all the things!
        chain |= run_indexing.si(new_index)

        # After indexing we optimize the index, adjust settings, and point the
        # alias to the new index.
        chain |= update_alias.si(new_index, old_index, ALIAS, {
            'number_of_replicas': num_replicas,
            'refresh_interval': '5s'
        })

        # Unflag the database.
        chain |= unflag_database.si()

        # Delete the old index, if any.
        if old_index:
            chain |= delete_index.si(old_index)

        chain |= output_summary.si()

        self.stdout.write('\nNew index and indexing tasks all queued up.\n')
        os.environ['FORCE_INDEXING'] = '1'
        try:
            chain.apply_async()
        finally:
            del os.environ['FORCE_INDEXING']
Example #32
0
 def test_mapping(self):
     mapping = WebappIndexer.get_mapping()
     eq_(mapping.keys(), ['webapp'])
     eq_(mapping['webapp']['_all'], {'enabled': False})
     eq_(mapping['webapp']['_boost'], {'name': '_boost', 'null_value': 1.0})
Example #33
0
 def test_mapping_type_name(self):
     eq_(WebappIndexer.get_mapping_type_name(), 'webapp')
Example #34
0
 def test_model(self):
     eq_(WebappIndexer.get_model(), Webapp)
Example #35
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        force = kwargs.get('force', False)
        prefix = kwargs.get('prefix', '')

        if is_reindexing_mkt() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            unflag_database()

        # The list of indexes that is currently aliased by `ALIAS`.
        try:
            aliases = ES.aliases(ALIAS).keys()
        except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
            aliases = []
        old_index = aliases[0] if aliases else None
        # Create a new index, using the index name with a timestamp.
        new_index = timestamp_index(prefix + ALIAS)

        # See how the index is currently configured.
        if old_index:
            try:
                s = (ES.get_settings(old_index).get(old_index, {})
                                               .get('settings', {}))
            except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
                s = {}
        else:
            s = {}

        num_replicas = s.get('number_of_replicas',
                             settings.ES_DEFAULT_NUM_REPLICAS)
        num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS)

        # Flag the database.
        chain = flag_database.si(new_index, old_index, ALIAS)

        # Create the index and mapping.
        #
        # Note: We set num_replicas=0 here to decrease load while re-indexing.
        # In a later step we increase it which results in a more efficient bulk
        # copy in Elasticsearch.
        # For ES < 0.90 we manually enable compression.
        chain |= create_index.si(new_index, ALIAS, {
            'analysis': WebappIndexer.get_analysis(),
            'number_of_replicas': 0, 'number_of_shards': num_shards,
            'store.compress.tv': True, 'store.compress.stored': True,
            'refresh_interval': '-1'})

        # Index all the things!
        chain |= run_indexing.si(new_index)

        # After indexing we optimize the index, adjust settings, and point the
        # alias to the new index.
        chain |= update_alias.si(new_index, old_index, ALIAS, {
            'number_of_replicas': num_replicas, 'refresh_interval': '5s'})

        # Unflag the database.
        chain |= unflag_database.si()

        # Delete the old index, if any.
        if old_index:
            chain |= delete_index.si(old_index)

        chain |= output_summary.si()

        self.stdout.write('\nNew index and indexing tasks all queued up.\n')
        os.environ['FORCE_INDEXING'] = '1'
        try:
            chain.apply_async()
        finally:
            del os.environ['FORCE_INDEXING']
Example #36
0
 def setUp(self):
     super(TestFixupCommand, self).setUp()
     self.index = WebappIndexer.get_index()
     self.doctype = WebappIndexer.get_mapping_type_name()
     self.es = WebappIndexer.get_es()
     self.app = Webapp.objects.get(pk=337141)
Example #37
0
 def test_index(self):
     with self.settings(ES_INDEXES={'webapp': 'apps'}):
         eq_(WebappIndexer.get_index(), 'apps')
Example #38
0
 def test_model(self):
     eq_(WebappIndexer.get_model(), Webapp)
Example #39
0
 def test_index(self):
     with self.settings(ES_INDEXES={'webapp': 'apps'}):
         eq_(WebappIndexer.get_index(), 'apps')
Example #40
0
 def test_mapping_type_name(self):
     eq_(WebappIndexer.get_mapping_type_name(), 'webapp')
Example #41
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        if not settings.MARKETPLACE:
            raise CommandError("This command affects only marketplace and " "should be run under Marketplace settings.")

        force = kwargs.get("force", False)
        prefix = kwargs.get("prefix", "")

        if database_flagged() and not force:
            raise CommandError("Indexation already occuring - use --force to " "bypass")
        elif force:
            unflag_database()

        # The list of indexes that is currently aliased by `ALIAS`.
        try:
            aliases = ES.aliases(ALIAS).keys()
        except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
            aliases = []
        old_index = aliases[0] if aliases else None
        # Create a new index, using the index name with a timestamp.
        new_index = timestamp_index(prefix + ALIAS)

        # See how the index is currently configured.
        if old_index:
            try:
                s = ES.get_settings(old_index).get(old_index, {}).get("settings", {})
            except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
                s = {}
        else:
            s = {}

        num_replicas = s.get("number_of_replicas", settings.ES_DEFAULT_NUM_REPLICAS)
        num_shards = s.get("number_of_shards", settings.ES_DEFAULT_NUM_SHARDS)

        # Flag the database.
        chain = flag_database.si(new_index, old_index, ALIAS)

        # Create the index and mapping.
        #
        # Note: We set num_replicas=0 here to decrease load while re-indexing.
        # In a later step we increase it which results in a more efficient bulk
        # copy in Elasticsearch.
        # For ES < 0.90 we manually enable compression.
        chain |= create_index.si(
            new_index,
            ALIAS,
            {
                "analysis": WebappIndexer.get_analysis(),
                "number_of_replicas": 0,
                "number_of_shards": num_shards,
                "store.compress.tv": True,
                "store.compress.stored": True,
                "refresh_interval": "-1",
            },
        )

        # Index all the things!
        chain |= run_indexing.si(new_index)

        # After indexing we optimize the index, adjust settings, and point the
        # alias to the new index.
        chain |= update_alias.si(
            new_index, old_index, ALIAS, {"number_of_replicas": num_replicas, "refresh_interval": "5s"}
        )

        # Unflag the database.
        chain |= unflag_database.si()

        # Delete the old index, if any.
        if old_index:
            chain |= delete_index.si(old_index)

        chain |= output_summary.si()

        self.stdout.write("\nNew index and indexing tasks all queued up.\n")
        os.environ["FORCE_INDEXING"] = "1"
        try:
            chain.apply_async()
        finally:
            del os.environ["FORCE_INDEXING"]
Example #42
0
 def _get_doc(self):
     qs = Webapp.indexing_transformer(
         Webapp.uncached.filter(id__in=[self.app.pk]))
     obj = qs[0]
     return obj, WebappIndexer.extract_document(obj.pk, obj)