def index_objects(mapping, queryset, index, print_progress=False): """ Index synchronously model specified mapping type with an optimized query. """ documents = [] for instance in queryset_iterator(mapping, queryset, print_progress=print_progress): documents.append(mapping.extract_document(instance.id, instance)) if len(documents) >= 100: mapping.bulk_index(documents, id_field='id', index=get_index_name(index, mapping), es=es) documents = [] mapping.bulk_index(documents, id_field='id', index=get_index_name(index, mapping), es=es) documents = []
def update_in_index(instance, mapping): """ Utility function for signal listeners index to Elasticsearch. Currently uses synchronous tasks. And because of that all exceptions are caught, so failures will not interfere with the regular model updates. """ if settings.ES_DISABLED: return if hasattr(instance, 'is_deleted') and instance.is_deleted: remove_from_index(instance, mapping) else: logger.info(u'Updating instance %s: %s' % (instance.__class__.__name__, instance.pk)) try: main_index_with_type = get_index_name(main_index, mapping) try: document = mapping.extract_document(instance.id, instance) except Exception as exc: logger.exception('Unable to extract document {0}: {1}'.format( instance, repr(exc))) else: # Index object direct instead of bulk_index, to prevent multiple reads from db mapping.index(document, id_=instance.id, es=es, index=main_index_with_type) es.indices.refresh(main_index_with_type) except Exception, e: logger.error(traceback.format_exc(e))
def unindex_objects(mapping, queryset, index, print_progress=False): """ Remove synchronously model specified mapping type with an optimized query. """ queryset = queryset.only('pk') for instance in queryset_iterator(mapping, queryset, print_progress=print_progress): try: mapping.unindex(instance.pk, index=get_index_name(index, mapping), es=es) except NotFoundError: # Not present in the first place? Just ignore. pass
def remove_from_index(instance, mapping): """ Utility function for signal listeners to remove from Elasticsearch. Currently uses synchronous tasks. And because of that all exceptions are caught, so failures will not interfere with the regular model updates. """ if settings.ES_DISABLED: return logger.info(u'Removing instance %s: %s' % (instance.__class__.__name__, instance.pk)) try: main_index_with_type = get_index_name(main_index, mapping) tasks.unindex_objects(mapping, [instance.id], es=es, index=main_index_with_type) es.indices.refresh(main_index_with_type) except NotFoundError, e: logger.warn('Not found in index instance %s: %s' % (instance.__class__.__name__, instance.pk))
def update_in_index(instance, mapping): """ Utility function for signal listeners index to Elasticsearch. Currently uses synchronous tasks. And because of that all exceptions are caught, so failures will not interfere with the regular model updates. """ if settings.ES_DISABLED: return if hasattr(instance, 'is_deleted') and instance.is_deleted: remove_from_index(instance, mapping) else: logger.info(u'Updating instance %s: %s' % (instance.__class__.__name__, instance.pk)) try: main_index_with_type = get_index_name(main_index, mapping) tasks.index_objects(mapping, [instance.id], es=es, index=main_index_with_type) es.indices.refresh(main_index_with_type) except Exception, e: logger.error(traceback.format_exc(e))
def do_search(self, return_fields=None): """ Execute the search. Arguments: return_fields (list): strings of fieldnames to return from result Returns: hits (list): dicts with search results per item count (int): total number of results took (int): milliseconds Elastic search took to get the results """ if settings.ES_DISABLED: return [], 0, 0 self.search = self.search.filter_raw({'and': self.raw_filters}) if self.model_type: self.search = self.search.doctypes(self.model_type) # Also limit the search to just the index with the right type. # This is faster than asking every index, also prevents some # annoying "cannot find field" errors in the elasticsearch logs. index_name = get_index_name(main_index, self.model_type) self.search = self.search.indexes(index_name) if self.facet: facet_raw = { "terms": { "field": self.facet['field'], "size": self.facet['size'], }, } if self.facet['filter']: facet_filter_dict = { 'and': [ { 'term': { 'tenant': self.tenant_id } }, { 'query': { 'query_string': { 'query': self.facet['filter'] } } } ] } facet_raw['facet_filter'] = facet_filter_dict self.search = self.search.facet_raw(items=facet_raw) # Fire off search. try: hits = [] execute = self.search.execute() for result in execute: hit = { 'id': result.id, } if not self.model_type: # We will add type if not specifically searched on it. hit['type'] = result.es_meta.type for field in result: # Add specified fields, or all fields when not specified. if return_fields: if field in return_fields: hit[field] = result[field] else: hit[field] = result[field] hits.append(hit) if execute.facets: return hits, execute.facets['items']['terms'], execute.count, execute.took return hits, None, execute.count, execute.took except RequestError as e: # This can happen when the query is malformed. For example: # A user entering special characters. This should normally be taken # care of where the request is built (usually in Javascript), # by escaping or omitting special characters. # This may be hard to get fool proof, therefore we also # catch the exception here to prevent server errors. logger.error('request error %s' % e) return [], None, 0, 0
def index(self): """ Do the actual indexing for all specified targets. """ for mapping in self.target_list: model_name = mapping.get_mapping_type_name() main_index_base = settings.ES_INDEXES['default'] main_index = get_index_name(main_index_base, mapping) self.stdout.write('==> %s' % model_name) # Check if we currently have an index for this mapping. old_index = None aliases = self.es.indices.get_aliases(name=main_index) for key, value in aliases.iteritems(): if value['aliases']: old_index = key self.stdout.write('Current index "%s"' % key) # Check any indices with no alias (leftovers from failed indexing). # Or it could be that it is still in progress, aliases = self.es.indices.get_aliases() for key, value in aliases.iteritems(): if not key.endswith(model_name): # Not the model we are looking after. continue if key == main_index: # This is an auto created index. Will be removed at end of command. continue if not value['aliases']: if self.force: self.stdout.write('Removing leftover "%s"' % key) self.es.indices.delete(key) else: raise Exception('Found leftover %s, proceed with -f to remove.' ' Make sure indexing this model is not already running!' % key) # Create new index. index_settings = { 'mappings': { model_name: mapping.get_mapping() }, 'settings': { 'analysis': get_analyzers()['analysis'], 'number_of_shards': 1, } } temp_index_base = 'index_%s' % (int(time.time())) temp_index = get_index_name(temp_index_base, mapping) self.stdout.write('Creating new index "%s"' % temp_index) self.es.indices.create(temp_index, body=index_settings) # Index documents. self.index_documents(mapping, temp_index_base) # Switch aliases. if old_index: self.es.indices.update_aliases({ 'actions': [ {'remove': {'index': old_index, 'alias': main_index}}, {'remove': {'index': old_index, 'alias': main_index_base}}, {'add': {'index': temp_index, 'alias': main_index}}, {'add': {'index': temp_index, 'alias': main_index_base}}, ] }) self.stdout.write('Removing previous index "%s"' % old_index) self.es.indices.delete(old_index) else: if self.es.indices.exists(main_index): # This is a corner case. There was no alias named index_name, but # an index index_name nevertheless exists, this only happens when the index # was already created (because of ES auto creation features). self.stdout.write('Removing previous (presumably auto created) index "%s"' % main_index) self.es.indices.delete(main_index) self.es.indices.update_aliases({ 'actions': [ {'add': {'index': temp_index, 'alias': main_index}}, {'add': {'index': temp_index, 'alias': main_index_base}}, ] }) self.stdout.write('') self.stdout.write('Indexing finished.')
def do_search(self, return_fields=None): """ Execute the search. Arguments: return_fields (list): strings of fieldnames to return from result Returns: hits (list): dicts with search results per item count (int): total number of results took (int): milliseconds Elastic search took to get the results """ if settings.ES_DISABLED: return [], 0, 0 self.search = self.search.filter_raw({'and': self.raw_filters}) if self.model_type: self.search = self.search.doctypes(self.model_type) # Also limit the search to just the index with the right type. # This is faster than asking every index, also prevents some # annoying "cannot find field" errors in the elasticsearch logs. index_name = get_index_name(main_index, self.model_type) self.search = self.search.indexes(index_name) if self.facet: facet_raw = { 'terms': { 'field': self.facet['field'], 'size': self.facet['size'], }, } facet_filter_dict = { 'and': [{ 'term': { 'tenant': self.tenant_id, } }] } if self.facet['filters']: for facet_filter in self.facet['filters']: facet_filter_dict['and'].append( {'query': { 'query_string': { 'query': facet_filter } }}) facet_raw['facet_filter'] = facet_filter_dict self.search = self.search.facet_raw(items=facet_raw) # Fire off search. try: hits = [] execute = self.search.execute() for result in execute: hit = { 'id': result.id, } if not self.model_type: # We will add type if not specifically searched on it. hit['type'] = result.es_meta.type for field in result: # Add specified fields, or all fields when not specified. if return_fields: if field in return_fields: hit[field] = result[field] else: hit[field] = result[field] hits.append(hit) if execute.facets: facets = execute.facets['items']['terms'] if self.model_type == 'tags_tag': for hit in hits: # Get the object with the given name. facet = next( (x for x in facets if x.get('term') == hit.get('name_flat')), None) if facet and (not facet.get('last_used') or hit.get('last_used') > facet.get('last_used')): # Set the latest usage date. facet.update({'last_used': hit.get('last_used')}) return hits, facets, execute.count, execute.took return hits, None, execute.count, execute.took except RequestError as e: # This can happen when the query is malformed. For example: # A user entering special characters. This should normally be taken # care of where the request is built (usually in Javascript), # by escaping or omitting special characters. # This may be hard to get fool proof, therefore we also # catch the exception here to prevent server errors. logger.error('request error %s' % e) return [], None, 0, 0
def handle(self, *args, **options): es = get_es_client() if args: self.stdout.write('Aborting, unexpected arguments %s' % list(args)) return if options['list']: self.stdout.write('Possible models to index:\n') for mapping in ModelMappings.get_model_mappings().values(): self.stdout.write(mapping.get_mapping_type_name()) return target = options['target'] if target: targets = target.split(',') else: targets = [] # (meaning all) has_targets = targets != [] self.stdout.write('Please remember that HelloLily needs to be in maintenance mode. \n\n') if has_targets: # Do a quick run to check if all targets are valid models. check_targets = list(targets) # make a copy for target in check_targets: for mapping in ModelMappings.get_model_mappings().values(): if self.model_targetted(mapping, [target]): check_targets.remove(target) break if check_targets: self.stdout.write('Aborting, following targets not recognized: %s' % check_targets) return for mapping in ModelMappings.get_model_mappings().values(): model_name = mapping.get_mapping_type_name() main_index_base = settings.ES_INDEXES['default'] main_index = get_index_name(main_index_base, mapping) # Skip this model if there are specific targets and not specified. if has_targets and not self.model_targetted(mapping, targets): continue self.stdout.write('==> %s' % model_name) # Check if we currently have an index for this mapping. old_index = None aliases = es.indices.get_aliases(name=main_index) for key, value in aliases.iteritems(): if value['aliases']: old_index = key self.stdout.write('Current index "%s"' % key) # Check any indices with no alias (leftovers from failed indexing). # Or it could be that it is still in progress, aliases = es.indices.get_aliases() for key, value in aliases.iteritems(): if not key.endswith(model_name): # Not the model we are looking after. continue if key == main_index: # This is an auto created index. Will be removed at end of command. continue if not value['aliases']: if options['force']: self.stdout.write('Removing leftover "%s"' % key) es.indices.delete(key) else: raise Exception('Found leftover %s, proceed with -f to remove.' ' Make sure indexing this model is not already running!' % key) # Create new index. index_settings = { 'mappings': { model_name: mapping.get_mapping() }, 'settings': { 'analysis': get_analyzers()['analysis'], 'number_of_shards': 1, } } temp_index_base = 'index_%s' % (int(time.time())) temp_index = get_index_name(temp_index_base, mapping) self.stdout.write('Creating new index "%s"' % temp_index) es.indices.create(temp_index, body=index_settings) # Index documents. self.index_documents(mapping, temp_index_base) # Switch aliases. if old_index: es.indices.update_aliases({ 'actions': [ {'remove': {'index': old_index, 'alias': main_index}}, {'remove': {'index': old_index, 'alias': main_index_base}}, {'add': {'index': temp_index, 'alias': main_index}}, {'add': {'index': temp_index, 'alias': main_index_base}}, ] }) self.stdout.write('Removing previous index "%s"' % old_index) es.indices.delete(old_index) else: if es.indices.exists(main_index): # This is a corner case. There was no alias named index_name, but # an index index_name nevertheless exists, this only happens when the index # was already created (because of ES auto creation features). self.stdout.write('Removing previous (presumably auto created) index "%s"' % main_index) es.indices.delete(main_index) es.indices.update_aliases({ 'actions': [ {'add': {'index': temp_index, 'alias': main_index}}, {'add': {'index': temp_index, 'alias': main_index_base}}, ] }) self.stdout.write('') self.stdout.write('Indexing finished.') if options['queries']: from django.db import connection for query in connection.queries: print query