def index_custom_indexes(index_name=None, clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE, quiet=False): """ Indexes any custom indexes, optionally by name Keyword Arguments: index_name -- if supplied will only reindex the custom index with the given name clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation batch_size -- the number of records to index as a group, the larger the number to more memory required quiet -- Silences the status bar output during certain operations, use in celery operations for example """ if index_name is None: for index in settings.ELASTICSEARCH_CUSTOM_INDEXES: es_index = import_class_from_string(index["module"])(index["name"]) es_index.reindex(clear_index=clear_index, batch_size=batch_size, quiet=quiet) else: es_index = get_index(index_name) es_index.reindex(clear_index=clear_index, batch_size=batch_size, quiet=quiet)
def index_resources_by_type(resource_types, clear_index=True, index_name=None, batch_size=settings.BULK_IMPORT_BATCH_SIZE): """ Indexes all resources of a given type(s) Arguments: resource_types -- array of graph ids that represent resource types Keyword Arguments: clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation index_name -- only applies to custom indexes and if given will try and just refresh the data in that index batch_size -- the number of records to index as a group, the larger the number to more memory required """ status = '' se = SearchEngineFactory().create() datatype_factory = DataTypeFactory() node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')} status = '' for resource_type in resource_types: start = datetime.now() resources = Resource.objects.filter(graph_id=str(resource_type)) graph_name = models.GraphModel.objects.get(graphid=str(resource_type)).name print("Indexing resource type '{0}'".format(graph_name)) if index_name is None: q = Query(se=se) term = Term(field='graph_id', term=str(resource_type)) q.add_query(term) if clear_index: q.delete(index='resources', refresh=True) with se.BulkIndexer(batch_size=batch_size, refresh=True) as doc_indexer: with se.BulkIndexer(batch_size=batch_size, refresh=True) as term_indexer: for resource in resources: document, terms = resource.get_documents_to_index(fetchTiles=True, datatype_factory=datatype_factory, node_datatypes=node_datatypes) doc_indexer.add(index='resources', id=document['resourceinstanceid'], data=document) for term in terms: term_indexer.add(index='terms', id=term['_id'], data=term['_source']) result_summary = {'database': len(resources), 'indexed': se.count(index='resources', body=q.dsl)} status = 'Passed' if result_summary['database'] == result_summary['indexed'] else 'Failed' print("Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds".format(status, graph_name, result_summary['database'], result_summary['indexed'], (datetime.now()-start).seconds)) for index in settings.ELASTICSEARCH_CUSTOM_INDEXES: es_index = import_class_from_string(index['module'])(index['name']) es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index) else: es_index = get_index(index_name) es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index) return status
def remove_index(self, name): es_index = get_index(name) es_index.delete_index()
def register_index(self, name): es_index = get_index(name) es_index.prepare_index()