def setUp(self): """Setup API Testing.""" # create an APP self.auto_user = user() up = self.auto_user.userprofile up.set_membership(Group, 'nice guy') up.set_membership(Skill, 'python') up.set_membership(Language, 'Greek') up.ircname = 'foobar' up.country = 'gr' up.region = 'Attika' up.city = 'Athens' up.save() self.auto_user.first_name = 'Foo' self.auto_user.last_name = 'Bar' self.auto_user.save() self.app = APIApp.objects.create(name='test_app', description='Foo', owner=self.mozillian, is_mozilla_app=False, is_active=False) index_all_profiles() get_es().flush(refresh=True)
def index(cls, document, id=None, bulk=False, force_insert=False, index=None): """Wrapper around pyes.ES.index.""" elasticutils.get_es().index( document, index=index or cls._get_index(), doc_type=cls._meta.db_table, id=id, bulk=bulk, force_insert=force_insert)
def index(cls, document, id=None, bulk=False, force_insert=False): """Wrapper around pyes.ES.index.""" elasticutils.get_es().index(document, index=cls._get_index(), doc_type=cls._meta.db_table, id=id, bulk=bulk, force_insert=force_insert)
def refresh(self, timesleep=0): index = es_utils.WRITE_INDEX # Any time we're doing a refresh, we're making sure that the # index is ready to be queried. Given that, it's almost # always the case that we want to run all the generated tasks, # then refresh. generate_tasks() get_es().refresh(index, timesleep=timesleep)
def setUpClass(cls): """Runs the :class:`TestCase` setup to add some data. Also flushes and refreshes the data so it's searchable via computer. """ estestcase.ESTestCase.setUpClass() TestCase.setUpClass() get_es().flush(refresh=True)
def refresh(self, run_tasks=True): index = es_utils.WRITE_INDEX if run_tasks: # Any time we're doing a refresh, we're making sure that # the index is ready to be queried. Given that, it's # almost always the case that we want to run all the # generated tasks, then refresh. generate_tasks() get_es().refresh(index) get_es().health(wait_for_status='yellow')
def delete_index_if_exists(index): """Delete the specified index. :arg index: The name of the index to delete. """ try: get_es().delete_index(index) except ElasticHttpNotFoundError: # Can ignore this since it indicates the index doesn't exist # and therefore there's nothing to delete. pass
def setUpClass(cls): super(ElasticTestCase, cls).setUpClass() if not getattr(settings, 'ES_URLS'): cls.skipme = True return # try to connect to ES and if it fails, skip ElasticTestCases. try: get_es().health() except es_utils.ES_EXCEPTIONS: cls.skipme = True return
def handle(self, *args, **options): url = options['url'] if url: es = get_es(urls=[url]) else: es = get_es() # We define some custom analyzers that our mappings can use. index_settings = {'mappings': {}, 'settings': get_analyzers()} # Retrieve the mappings for the index-enabled models. for mappingClass in self.MAPPINGS: model_name = mappingClass.get_mapping_type_name() index_settings['mappings'].update({model_name: mappingClass.get_mapping()}) # Create a new index. new_index = 'index_%s' % (int(time.time())) es.indices.create(new_index, body=index_settings) self.index(new_index) # The default index name, (we will use as an alias). index_name = settings.ES_INDEXES['default'] # Check if we have a current index. old_index = None aliases = es.indices.get_aliases(name=index_name) for key, value in aliases.iteritems(): if value['aliases']: old_index = key # Change the alias to point to our new index, and remove the old index. self.stdout.write('Changing alias "%s" from old index "%s" to new index "%s"' % (index_name, old_index, new_index)) if old_index: es.indices.update_aliases({'actions': [{'remove': {'index': old_index, 'alias': index_name}}, {'add': {'index': new_index, 'alias': index_name}}]}) es.indices.delete(old_index) else: if es.indices.exists(index_name): # This is a corner case. There was no alias named index_name, but # an index index_name nevertheless exists, this only happens when the index # was already created (because of ES auto creation features). es.indices.delete(index_name) es.indices.update_aliases({'actions': [{'add': {'index': new_index, 'alias': index_name}}]}) # Finally re-index one more time, to pick up updates that were written during our command. # Note that this models that do not use the DeletedMixin will not work this way. self.index(index_name) self.unindex(index_name)
def test_remove_index(self): # Putting a test_amo index in the way. es = elasticutils.get_es() for index in es.get_indices().keys(): for prefix in ('test_amo', 'test_amo_stats'): if index.startswith(prefix + '-'): es.delete_alias(prefix, [index]) es.delete_index(index) es.create_index(prefix) # reindexing the first app self.webapp.save() self.refresh() # now doing a reindexation in a background process args = [ sys.executable, 'manage.py', 'reindex', '--prefix=test_', '--settings=%s' % self.settings ] indexer = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=settings.ROOT) stdout, stderr = indexer.communicate() self.assertTrue('Reindexation done' in stdout, stdout + '\n' + stderr)
def recreate_index(es=None): """Deletes index if it's there and creates a new one""" if es is None: es = get_es() from search.models import get_search_models search_models = get_search_models() merged_mapping = { SUMO_DOCTYPE: { 'properties': merge_mappings( [(cls._meta.db_table, cls.get_mapping()) for cls in search_models]) } } index = WRITE_INDEX delete_index(index) # There should be no mapping-conflict race here since the index doesn't # exist. Live indexing should just fail. # Simultaneously create the index and the mappings, so live # indexing doesn't get a chance to index anything between the two # and infer a bogus mapping (which ES then freaks out over when we # try to lay in an incompatible explicit mapping). es.create_index(index, settings={'mappings': merged_mapping})
def index_finance_total_by_currency(addons, **kw): """ Bug 757581 Total finance stats, currency breakdown. """ es = elasticutils.get_es() log.info('Indexing total financial stats by currency for %s apps.' % len(addons)) for addon in addons: # Get all contributions for given add-on. qs = Contribution.objects.filter(addon=addon, uuid=None) if not qs.exists(): continue # Get list of distinct currencies. currencies = set(qs.values_list('currency', flat=True)) for currency in currencies: try: key = ord_word('cur' + str(addon) + currency.lower()) data = search.get_finance_total(qs, addon, 'currency', currency=currency) if not already_indexed(Contribution, data): Contribution.index(data, bulk=True, id=key) es.flush_bulk(forced=True) except Exception, exc: index_finance_total_by_currency.retry(args=[addons], exc=exc) raise
def setup_mkt_indexes(index=None, aliased=True): """ Define explicit ES mappings for models. If a field is not explicitly defined and a field is inserted, ES will dynamically guess the type and insert it, in a schemaless manner. """ es = elasticutils.get_es() for model in [Contribution, InappPayment]: index = index or model._get_index() index = create_es_index_if_missing(index, aliased=aliased) mapping = { 'properties': { 'id': {'type': 'long'}, 'date': {'format': 'dateOptionalTime', 'type': 'date'}, 'count': {'type': 'long'}, 'revenue': {'type': 'double'}, # Try to tell ES not to 'analyze' the field to querying with # hyphens and lowercase letters. 'currency': {'type': 'string', 'index': 'not_analyzed'}, 'source': {'type': 'string', 'index': 'not_analyzed'}, 'inapp': {'type': 'string', 'index': 'not_analyzed'} } } es.put_mapping(model._meta.db_table, mapping, index)
def index_all_profiles(): # Get an es object, delete index and re-create it es = get_es(timeout=settings.ES_INDEXING_TIMEOUT) mappings = {'mappings': {UserProfileMappingType.get_mapping_type_name(): UserProfileMappingType.get_mapping()}} def _recreate_index(index): es.indices.delete(index=index, ignore=[400, 404]) es.indices.create(index, body=mappings) _recreate_index(settings.ES_INDEXES['default']) _recreate_index(settings.ES_INDEXES['public']) # mozillians index ids = UserProfile.objects.complete().values_list('id', flat=True) ts = [index_objects.subtask(kwargs={'mapping_type': UserProfileMappingType, 'ids': ids, 'chunk_size': 150, 'public_index': False})] # public index ids = (UserProfile.objects.complete().public_indexable() .privacy_level(PUBLIC).values_list('id', flat=True)) ts += [index_objects.subtask(kwargs={'mapping_type': UserProfileMappingType, 'ids': ids, 'chunk_size': 150, 'public_index': True})] TaskSet(ts).apply_async()
def recreate_index(es=None): """Deletes index if it's there and creates a new one""" if es is None: es = get_es() from search.models import get_search_models search_models = get_search_models() merged_mapping = { SUMO_DOCTYPE: { 'properties': merge_mappings([(cls._meta.db_table, cls.get_mapping()) for cls in search_models]) } } index = WRITE_INDEX delete_index(index) # There should be no mapping-conflict race here since the index doesn't # exist. Live indexing should just fail. # Simultaneously create the index and the mappings, so live # indexing doesn't get a chance to index anything between the two # and infer a bogus mapping (which ES then freaks out over when we # try to lay in an incompatible explicit mapping). es.create_index(index, settings={'mappings': merged_mapping})
def index_installed_daily(ids, **kw): """ Takes a list of Installed ids and uses its addon and date fields to index stats for that day. ids -- ids of mkt.webapps.Installed objects """ from mkt.webapps.models import Installed es = elasticutils.get_es() # Get Installed's qs = (Installed.objects.filter( id__in=set(ids)).order_by('-created').values('addon', 'created')) log.info('[%s] Indexing %s installed counts for daily stats.' % (qs[0]['created'], len(qs))) addons_dates = defaultdict(lambda: defaultdict(dict)) for installed in qs: addon = installed['addon'] date = installed['created'].strftime('%Y%m%d') try: if not date in addons_dates[addon]: key = ord_word('ins' + str(addon) + str(date)) data = search.get_installed_daily(installed) if not already_indexed(Installed, data): Installed.index(data, bulk=True, id=key) addons_dates[addon][date] = 0 es.flush_bulk(forced=True) except Exception, exc: index_installed_daily.retry(args=[ids], exc=exc) raise
def index_finance_total_by_src(addons, **kw): """ Bug 758059 Total finance stats, source breakdown. """ es = elasticutils.get_es() log.info('Indexing total financial stats by source for %s apps.' % len(addons)) for addon in addons: # Get all contributions for given add-on. qs = Contribution.objects.filter(addon=addon, uuid=None) if not qs.exists(): continue # Get list of distinct sources. sources = set(qs.values_list('source', flat=True)) for source in sources: try: key = ord_word('src' + str(addon) + str(source)) data = search.get_finance_total(qs, addon, 'source', source=source) if not already_indexed(Contribution, data): Contribution.index(data, bulk=True, id=key) es.flush_bulk(forced=True) except Exception, exc: index_finance_total_by_src.retry(args=[addons], exc=exc) raise
def index_all_profiles(): # Get an es object, delete index and re-create it es = get_es(timeout=settings.ES_INDEXING_TIMEOUT) mappings = { 'mappings': { UserProfileMappingType.get_mapping_type_name(): UserProfileMappingType.get_mapping() } } def _recreate_index(index): es.indices.delete(index=index, ignore=[400, 404]) es.indices.create(index, body=mappings) _recreate_index(settings.ES_INDEXES['default']) _recreate_index(settings.ES_INDEXES['public']) # mozillians index ids = UserProfile.objects.complete().values_list('id', flat=True) ts = [ index_objects.subtask(args=[UserProfileMappingType, chunk, 150, False]) for chunk in chunked(sorted(list(ids)), 150) ] # public index ts += [ index_objects.subtask(args=[UserProfileMappingType, chunk, 150, True]) for chunk in chunked(sorted(list(ids)), 150) ] TaskSet(ts).apply_async()
def setup_mkt_indexes(): """ Define explicit ES mappings for models. If a field is not explicitly defined and a field is inserted, ES will dynamically guess the type and insert it, in a schemaless manner. """ es = elasticutils.get_es() for model in [Contribution, InappPayment]: index = model._get_index() create_es_index_if_missing(index) mapping = { 'properties': { 'id': {'type': 'long'}, 'date': {'format': 'dateOptionalTime', 'type': 'date'}, 'count': {'type': 'long'}, 'revenue': {'type': 'double'}, # Try to tell ES not to 'analyze' the field to querying with # hyphens and lowercase letters. 'currency': {'type': 'string', 'index': 'not_analyzed'}, 'source': {'type': 'string', 'index': 'not_analyzed'}, 'inapp': {'type': 'string', 'index': 'not_analyzed'} } } es.put_mapping(model._meta.db_table, mapping, model._get_index())
def __init__(self, *args, **kwargs): from elasticutils.contrib.django import get_es es = get_es() es.indices.create(index=ES_INDEXES['default'], ignore=400) es.indices.create(index=ES_INDEXES['public'], ignore=400) super(TestCase, self).__init__(*args, **kwargs)
def setup_mkt_indexes(index=None, aliased=True): """ Define explicit ES mappings for models. If a field is not explicitly defined and a field is inserted, ES will dynamically guess the type and insert it, in a schemaless manner. """ es = elasticutils.get_es() for model in [Contribution, InappPayment]: index = index or model._get_index() index = create_es_index_if_missing(index, aliased=aliased) mapping = { "properties": { "id": {"type": "long"}, "date": {"format": "dateOptionalTime", "type": "date"}, "count": {"type": "long"}, "revenue": {"type": "double"}, # Try to tell ES not to 'analyze' the field to querying with # hyphens and lowercase letters. "currency": {"type": "string", "index": "not_analyzed"}, "source": {"type": "string", "index": "not_analyzed"}, "inapp": {"type": "string", "index": "not_analyzed"}, } } es.put_mapping(model._meta.db_table, mapping, index)
def handle(self, index_suffix, **options): index_suffix = '_' + index_suffix indexes = {} for name, model in polymorphic_indexable_registry.all_models.items(): alias = model.get_index_name() index = alias + index_suffix if alias not in indexes: indexes[alias] = index es = get_es() alias_actions = [] # remove existing indexes using the aliases we want existing_aliases = es.indices.get_aliases() for index, aliases in existing_aliases.items(): for alias, new_index in indexes.items(): if "aliases" in aliases: if alias in aliases["aliases"]: alias_actions.append({ "remove": { "alias": alias, "index": index } }) # add our new aliases for alias, index in indexes.items(): alias_actions.append({ "add": { "alias": alias, "index": index } }) es.indices.update_aliases(body=dict(actions=alias_actions))
def index_chunk(cls, id_list, es=None): """Index a chunk of documents. :arg cls: The MappingType class. :arg id_list: Iterable of ids of that MappingType to index. :arg es: The ES to use. Defaults to creating a new indexing ES. """ if es is None: es = get_es() for ids in chunked(id_list, 200): documents = [] obj_list = cls.get_model().uncached.filter(id__in=ids) documents = [ cls.extract_document(obj_id=obj.id, obj=obj) for obj in obj_list ] if documents: cls.bulk_index(documents, id_field='id', es=es) if settings.DEBUG: # Nix queries so that this doesn't become a complete # memory hog and make Will's computer sad when DEBUG=True. reset_queries()
def index(cls, document, id_=None, bulk=False, force_insert=False, es=None): """Adds or updates a document to the index :arg document: Python dict of key/value pairs representing the document .. Note:: This must be serializable into JSON. :arg id_: the Django ORM model instance id---this is used to convert an ES search result back to the Django ORM model instance from the db. It should be an integer. :arg bulk: Whether or not this is part of a bulk indexing. If this is, you must provide an ES with the `es` argument, too. :arg force_insert: TODO :arg es: The ES to use. If you don't specify an ES, it'll use `elasticutils.contrib.django.get_es()`. :raises ValueError: if `bulk` is True, but `es` is None. TODO: add example. """ if bulk and es is None: raise ValueError('bulk is True, but es is None') if es is None: es = get_es() es.index( document, index=cls.get_index(), doc_type=cls.get_mapping_type(), id=id_, bulk=bulk, force_insert=force_insert)
def index_chunk(cls, id_list, reraise=False, es=None): """Index a chunk of documents. :arg cls: The MappingType class. :arg id_list: Iterable of ids of that MappingType to index. :arg reraise: False if you want errors to be swallowed and True if you want errors to be thrown. :arg es: The ES to use. Defaults to creating a new indexing ES. """ if es is None: es = get_es() for ids in chunked(id_list, 200): documents = [] obj_list = cls.get_model().objects.filter(id__in=ids) for obj in obj_list: try: documents.append(cls.extract_document(obj_id=obj.id, obj=obj)) except Exception: log.exception('Unable to extract/index document (id: %d)', obj.id) if reraise: raise if documents: cls.bulk_index(documents, id_field='id', es=es) if settings.DEBUG: # Nix queries so that this doesn't become a complete # memory hog and make Will's computer sad when DEBUG=True. reset_queries()
def index_chunk(cls, chunk, reraise=False, es=None): """Index a chunk of documents. :arg cls: The MappingType class. :arg chunk: Iterable of ids of that MappingType to index. :arg reraise: False if you want errors to be swallowed and True if you want errors to be thrown. :arg es: The ES to use. Defaults to creating a new indexing ES. .. Note:: This indexes all the documents in the chunk in one single bulk indexing call. Keep that in mind when you break your indexing task into chunks. """ if es is None: es = get_es() documents = [] for id_ in chunk: try: documents.append(cls.extract_document(id_)) except Exception: log.exception('Unable to extract/index document (id: %d)', id_) if reraise: raise cls.bulk_index(documents, id_field='id', es=es)
def es_analyze(text, analyzer=None): """Returns analysis of text. :arg text: the text to analyze :arg analyzer: (optional) the analyzer to use. Defaults to snowball which is an English-settings analyzer. :returns: list of dicts each describing a token """ es = get_es() index = get_index() analyzer = analyzer or 'snowball' # pyelasticsearch doesn't support analyze, so we do it "manually" # using pyelasticsearch's innards. When we update to # elasticsearch-py we should rewrite this. ret = es.send_request( 'GET', [index, '_analyze'], query_params={'analyzer': analyzer}, body=text) return ret['tokens']
def setup_class(cls): super(QueryTest, cls).setup_class() if cls.skip_tests: return try: import django except ImportError: cls.skip_tests = True return from elasticutils.contrib.django import get_es es = get_es() try: es.delete_index_if_exists(cls.index_name) except pyes.exceptions.IndexMissingException: # TODO: No clue why this is throwing an IndexMissingException # because I thought the whole point of delete_index_if_exists # is that it _didn't_ throw an exception if the index was # missing. pass data1 = FakeModel(id=1, foo='bar', tag='awesome', width='2') data2 = FakeModel(id=2, foo='barf', tag='boring', width='7') data3 = FakeModel(id=3, foo='car', tag='awesome', width='5') data4 = FakeModel(id=4, foo='duck', tag='boat', width='11') data5 = FakeModel(id=5, foo='train car', tag='awesome', width='7') for data in (data1, data2, data3, data4, data5): es.index(data.__dict__, cls.index_name, FakeModel._meta.db_table, bulk=True, id=data.id) es.refresh()
def index_finance_daily_inapp(ids, **kw): """ Similar to index_finance_daily, except for InappPayments. ids -- ids of mkt.stats.webapps.InappPayment objects """ es = elasticutils.get_es() # Get contributions. qs = (InappPayment.objects.filter(id__in=ids).order_by('created').values( 'name', 'config__addon', 'created')) log.info('[%s] Indexing %s in-app payments for daily stats.' % (qs[0]['created'], len(ids))) # It's defaultdicts all the way down. addons_inapps_dates = defaultdict( lambda: defaultdict(lambda: defaultdict(int))) for payment in qs: addon = payment['config__addon'] inapp = payment['name'] date = payment['created'].strftime('%Y%m%d') # Date for add-on not processed, index it and give it key. if not date in addons_inapps_dates[addon][inapp]: key = ord_word('fin%s%s%s' % (str(addon), str(inapp), str(date))) data = search.get_finance_daily_inapp(payment) try: if not already_indexed(InappPayment, data): InappPayment.index(data, bulk=True, id=key) addons_inapps_dates[addon][inapp][date] = 0 es.flush_bulk(forced=True) except Exception, exc: index_finance_daily_inapp.retry(args=[ids], exc=exc) raise
def unindex(cls, id): es = elasticutils.get_es() try: es.delete(cls._get_index(), cls._meta.db_table, id) except pyes.exceptions.NotFoundException: # Item wasn't found, whatevs. pass
def index_collections(ids, **kw): es = elasticutils.get_es() log.debug('Indexing collections %s-%s [%s].' % (ids[0], ids[-1], len(ids))) qs = Collection.uncached.filter(id__in=ids).transform(attach_translations) for c in qs: Collection.index(search.extract(c), bulk=True, id=c.id) es.flush_bulk(forced=True)
def remap(index_cls): #Get an instance of the elasticsearch python wrapper es = get_es() index = index_cls.get_index() model = index_cls.get_model() type_name = index_cls.get_mapping_type_name() #Delete the mapping if it exists if es.indices.exists(index=index): try: es.indices.delete_mapping(index=index, doc_type=type_name) except: pass #Put the mapping #Comment this out for letting elasticsearch generate the mapping result = es.indices.put_mapping( index=index, doc_type=type_name, body={ type_name : index_cls.get_mapping() } ) if not result['acknowledged']: print "Mapping was not acknowledged by elasticsearch" else: print "Mapping acknowledged by elasticsearch"
def setup_indexes(index=None, aliased=True): es = elasticutils.get_es() for model in CollectionCount, DownloadCount, UpdateCount: index = index or model._get_index() index = create_es_index_if_missing(index, aliased=aliased) mapping = { 'properties': { 'id': { 'type': 'long' }, 'count': { 'type': 'long' }, 'data': { 'dynamic': 'true', 'properties': { 'v': { 'type': 'long' }, 'k': { 'type': 'string' } } }, 'date': { 'format': 'dateOptionalTime', 'type': 'date' } } } es.put_mapping(model._meta.db_table, mapping, index)
def index_finance_total_inapp(addons, **kw): """ Bug 758071 Aggregates financial stats from all of the contributions for in-apps. """ es = elasticutils.get_es() log.info('Indexing total financial in-app stats for %s apps.' % len(addons)) for addon in addons: # Get all in-app names for given addon. inapps = set(InappPayment.objects.filter(config__addon=addon). values_list('name', flat=True)) for inapp_name in inapps: # Get all in-app payments for given in-app. qs = InappPayment.objects.filter(name=inapp_name, contribution__uuid=None) if not qs.exists(): continue try: key = ord_word('totinapp' + str(addon) + inapp_name) data = search.get_finance_total_inapp(qs, addon, inapp_name) if not already_indexed(InappPayment, data): InappPayment.index(data, bulk=True, id=key) es.flush_bulk(forced=True) except Exception, exc: index_finance_total_inapp.retry(args=[addons], exc=exc) raise
def index_finance_total_inapp(addons, **kw): """ Bug 758071 Aggregates financial stats from all of the contributions for in-apps. """ es = elasticutils.get_es() log.info('Indexing total financial in-app stats for %s apps.' % len(addons)) for addon in addons: # Get all in-app names for given addon. inapps = set( InappPayment.objects.filter(config__addon=addon).values_list( 'name', flat=True)) for inapp_name in inapps: # Get all in-app payments for given in-app. qs = InappPayment.objects.filter(name=inapp_name, contribution__uuid=None) if not qs.exists(): continue try: key = ord_word('totinapp' + str(addon) + inapp_name) data = search.get_finance_total_inapp(qs, addon, inapp_name) if not already_indexed(InappPayment, data): InappPayment.index(data, bulk=True, id=key) es.flush_bulk(forced=True) except Exception, exc: index_finance_total_inapp.retry(args=[addons], exc=exc) raise
def index_installed_daily(ids, **kw): """ Takes a list of Installed ids and uses its addon and date fields to index stats for that day. ids -- ids of mkt.webapps.Installed objects """ from mkt.webapps.models import Installed es = elasticutils.get_es() # Get Installed's qs = (Installed.objects.filter(id__in=set(ids)). order_by('-created').values('addon', 'created')) log.info('[%s] Indexing %s installed counts for daily stats.' % (qs[0]['created'], len(qs))) addons_dates = defaultdict(lambda: defaultdict(dict)) for installed in qs: addon = installed['addon'] date = installed['created'].strftime('%Y%m%d') try: if not date in addons_dates[addon]: key = ord_word('ins' + str(addon) + str(date)) data = search.get_installed_daily(installed) if not already_indexed(Installed, data): Installed.index(data, bulk=True, id=key) addons_dates[addon][date] = 0 es.flush_bulk(forced=True) except Exception, exc: index_installed_daily.retry(args=[ids], exc=exc) raise
def index_all_profiles(): # Get an es object, delete index and re-create it index = settings.ES_INDEXES['default'] es = get_es(timeout=settings.ES_INDEXING_TIMEOUT) try: es.delete_index_if_exists(index) except pyes.exceptions.IndexMissingException: pass mappings = { 'mappings': { UserProfile._meta.db_table: UserProfile.get_mapping() } } es.create_index(index, settings=mappings) ids = (UserProfile.objects.exclude(full_name='').values_list('id', flat=True)) ts = [ tasks.index_objects.subtask(args=[UserProfile, chunk]) for chunk in chunked(sorted(list(ids)), 150) ] TaskSet(ts).apply_async()
def index_all_profiles(): # Get an es object, delete index and re-create it es = get_es(timeout=settings.ES_INDEXING_TIMEOUT) mappings = {'mappings': {UserProfile._meta.db_table: UserProfile.get_mapping()}} def _recreate_index(index): try: es.delete_index_if_exists(index) except pyes.exceptions.IndexMissingException: pass es.create_index(index, settings=mappings) _recreate_index(settings.ES_INDEXES['default']) _recreate_index(settings.ES_INDEXES['public']) # mozillians index ids = UserProfile.objects.complete().values_list('id', flat=True) ts = [index_objects.subtask(args=[UserProfile, chunk, False]) for chunk in chunked(sorted(list(ids)), 150)] # public index ids = (UserProfile.objects.complete().public_indexable() .privacy_level(PUBLIC).values_list('id', flat=True)) ts += [index_objects.subtask(args=[UserProfile, chunk, True]) for chunk in chunked(sorted(list(ids)), 150)] TaskSet(ts).apply_async()
def index_finance_total_by_currency(addons, **kw): """ Bug 757581 Total finance stats, currency breakdown. """ es = elasticutils.get_es() log.info('Indexing total financial stats by currency for %s apps.' % len(addons)) for addon in addons: # Get all contributions for given add-on. qs = Contribution.objects.filter(addon=addon, uuid=None) if not qs.exists(): continue # Get list of distinct currencies. currencies = set(qs.values_list('currency', flat=True)) for currency in currencies: try: key = ord_word('cur' + str(addon) + currency.lower()) data = search.get_finance_total( qs, addon, 'currency', currency=currency) if not already_indexed(Contribution, data): Contribution.index(data, bulk=True, id=key) es.flush_bulk(forced=True) except Exception, exc: index_finance_total_by_currency.retry(args=[addons], exc=exc) raise
def recreate_indexes(es=None, indexes=None): """Deletes indexes and recreates them. :arg es: An ES object to use. Defaults to calling `get_es()`. :arg indexes: A list of indexes to recreate. Defaults to all write indexes. """ if es is None: es = get_es() if indexes is None: indexes = all_write_indexes() for index in indexes: delete_index(index) # There should be no mapping-conflict race here since the index doesn't # exist. Live indexing should just fail. # Simultaneously create the index, the mappings, the analyzers, and # the tokenizers, so live indexing doesn't get a chance to index # anything between and infer a bogus mapping (which ES then freaks # out over when we try to lay in an incompatible explicit mapping). es.indices.create(index=index, body={ 'mappings': get_mappings(index), 'settings': { 'analysis': get_analysis(), } }) # Wait until the index is there. es.cluster.health(wait_for_status='yellow')
def test_remove_index(self): # Putting a test_amo index in the way. es = elasticutils.get_es() for index in es.get_indices().keys(): for prefix in ('test_amo', 'test_amo_stats'): if index.startswith(prefix + '-'): es.delete_alias(prefix, [index]) es.delete_index(index) es.create_index(prefix) # reindexing the first app self.webapp.save() self.refresh() # now doing a reindexation in a background process args = [sys.executable, 'manage.py', 'reindex', '--prefix=test_', '--settings=%s' % self.settings] indexer = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=settings.ROOT) stdout, stderr = indexer.communicate() self.assertTrue('Reindexation done' in stdout, stdout + '\n' + stderr)
def unindex_objects(mapping_type, ids, public_index, **kwargs): if getattr(settings, 'ES_DISABLED', False): return es = get_es() for id_ in ids: mapping_type.unindex(id_, es=es, public_index=public_index)
def recreate_index(es=None): """Delete index if it's there and creates a new one. :arg es: ES to use. By default, this creates a new indexing ES. """ if es is None: es = get_es() mappings = {} for name, mt in get_mapping_types().items(): mapping = mt.get_mapping() if mapping is not None: mappings[name] = {'properties': mapping} index = get_index() delete_index_if_exists(index) # There should be no mapping-conflict race here since the index # doesn't exist. Live indexing should just fail. # Simultaneously create the index and the mappings, so live # indexing doesn't get a chance to index anything between the two # causing ES to infer a possibly bogus mapping (which causes ES to # freak out if the inferred mapping is incompatible with the # explicit mapping). es.create_index(index, settings={'mappings': mappings})
def test_get_es_defaults(self): """Test that the ES has the correct defaults.""" es = get_es() eq_(es.timeout, settings.ES_TIMEOUT) # dump_curl defaults to False, but if dump_curl is Falsey, # then pyes.es.ES sets its dump_curl attribute to None. eq_(es.dump_curl, None) eq_(es.default_indexes, [settings.ES_INDEXES['default']])
def get_es(cls): """Returns an ElasticSearch object Override this if you need special functionality. :returns: a pyelasticsearch `ElasticSearch` instance """ return get_es()
def user(**kwargs): profile_changes = {} if 'username' not in kwargs: kwargs['username'] = ''.join( random.choice(letters) for x in xrange(15)) if 'email' not in kwargs: kwargs['email'] = ''.join(random.choice(letters) for x in xrange(15)) + '@example.com' if 'first_name' not in kwargs: kwargs['first_name'] = ''.join( random.choice(letters) for x in xrange(15)) if 'last_name' not in kwargs: kwargs['last_name'] = ''.join( random.choice(letters) for x in xrange(15)) if 'vouched' in kwargs: profile_changes['vouched'] = kwargs['vouched'] del kwargs['vouched'] if 'photo' in kwargs: profile_changes['photo'] = kwargs['photo'] del kwargs['photo'] user = User.objects.create(**kwargs) user.save() if profile_changes: profile = user.get_profile() if 'vouched' in profile_changes: if profile_changes['vouched']: profile.is_vouched = True if 'photo' in profile_changes: if profile_changes['photo']: with open( os.path.join(os.path.dirname(__file__), 'profile-photo.jpg')) as f: profile.photo = File(f) profile.save() # Must save inside with block profile.save() if not settings.ES_DISABLED: get_es().refresh(settings.ES_INDEXES['default'], timesleep=0) return user
def index(cls, document, id_=None, bulk=False, force_insert=False, es=None): """Adds or updates a document to the index :arg document: Python dict of key/value pairs representing the document .. Note:: This must be serializable into JSON. :arg id_: the Django ORM model instance id---this is used to convert an ES search result back to the Django ORM model instance from the db. It should be an integer. .. Note:: If you don't provide an ``id_``, then ElasticSearch will make up an id for your document and it'll look like a character name from a Lovecraft novel. :arg bulk: Whether or not this is part of a bulk indexing. If this is, you must provide an ES with the `es` argument, too. :arg force_insert: TODO :arg es: The ES to use. If you don't specify an ES, it'll use `elasticutils.contrib.django.get_es()`. :raises ValueError: if `bulk` is True, but `es` is None. .. Note:: After you add things to the index, make sure to refresh the index by calling ``refresh_index()``---it doesn't happen automatically. TODO: add example. """ if bulk and es is None: raise ValueError('bulk is True, but es is None') if es is None: es = get_es() es.index(document, index=cls.get_index(), doc_type=cls.get_mapping_type_name(), id=id_, bulk=bulk, force_insert=force_insert)