def index(self): """ Indexes all the nessesary items values of a resource to support search """ if str(self.graph_id) != str( settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID): datatype_factory = DataTypeFactory() node_datatypes = { str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list( "nodeid", "datatype") } document, terms = self.get_documents_to_index( datatype_factory=datatype_factory, node_datatypes=node_datatypes) document["root_ontology_class"] = self.get_root_ontology() doc = JSONSerializer().serializeToPython(document) se.index_data(index=RESOURCES_INDEX, body=doc, id=self.pk) for term in terms: se.index_data("terms", body=term["_source"], id=term["_id"]) for index in settings.ELASTICSEARCH_CUSTOM_INDEXES: es_index = import_class_from_string(index["module"])( index["name"]) doc, doc_id = es_index.get_documents_to_index( self, document["tiles"]) es_index.index_document(document=doc, id=doc_id)
def index_custom_indexes(index_name=None, clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE, quiet=False): """ Indexes any custom indexes, optionally by name Keyword Arguments: index_name -- if supplied will only reindex the custom index with the given name clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation batch_size -- the number of records to index as a group, the larger the number to more memory required quiet -- Silences the status bar output during certain operations, use in celery operations for example """ if index_name is None: for index in settings.ELASTICSEARCH_CUSTOM_INDEXES: es_index = import_class_from_string(index["module"])(index["name"]) es_index.reindex(clear_index=clear_index, batch_size=batch_size, quiet=quiet) else: es_index = get_index(index_name) es_index.reindex(clear_index=clear_index, batch_size=batch_size, quiet=quiet)
def index_resources_by_type(resource_types, clear_index=True, index_name=None, batch_size=settings.BULK_IMPORT_BATCH_SIZE): """ Indexes all resources of a given type(s) Arguments: resource_types -- array of graph ids that represent resource types Keyword Arguments: clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation index_name -- only applies to custom indexes and if given will try and just refresh the data in that index batch_size -- the number of records to index as a group, the larger the number to more memory required """ status = '' se = SearchEngineFactory().create() datatype_factory = DataTypeFactory() node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')} status = '' for resource_type in resource_types: start = datetime.now() resources = Resource.objects.filter(graph_id=str(resource_type)) graph_name = models.GraphModel.objects.get(graphid=str(resource_type)).name print("Indexing resource type '{0}'".format(graph_name)) if index_name is None: q = Query(se=se) term = Term(field='graph_id', term=str(resource_type)) q.add_query(term) if clear_index: q.delete(index='resources', refresh=True) with se.BulkIndexer(batch_size=batch_size, refresh=True) as doc_indexer: with se.BulkIndexer(batch_size=batch_size, refresh=True) as term_indexer: for resource in resources: document, terms = resource.get_documents_to_index(fetchTiles=True, datatype_factory=datatype_factory, node_datatypes=node_datatypes) doc_indexer.add(index='resources', id=document['resourceinstanceid'], data=document) for term in terms: term_indexer.add(index='terms', id=term['_id'], data=term['_source']) result_summary = {'database': len(resources), 'indexed': se.count(index='resources', body=q.dsl)} status = 'Passed' if result_summary['database'] == result_summary['indexed'] else 'Failed' print("Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds".format(status, graph_name, result_summary['database'], result_summary['indexed'], (datetime.now()-start).seconds)) for index in settings.ELASTICSEARCH_CUSTOM_INDEXES: es_index = import_class_from_string(index['module'])(index['name']) es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index) else: es_index = get_index(index_name) es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index) return status
def delete_index(self, resourceinstanceid=None): """ Deletes all references to a resource from all indexes Keyword Arguments: resourceinstanceid -- the resource instance id to delete from related indexes, if supplied will use this over self.resourceinstanceid """ if resourceinstanceid is None: resourceinstanceid = self.resourceinstanceid resourceinstanceid = str(resourceinstanceid) # delete any related terms query = Query(se) bool_query = Bool() bool_query.filter( Terms(field="resourceinstanceid", terms=[resourceinstanceid])) query.add_query(bool_query) query.delete(index=TERMS_INDEX) # delete any related resource index entries query = Query(se) bool_query = Bool() bool_query.should( Terms(field="resourceinstanceidto", terms=[resourceinstanceid])) bool_query.should( Terms(field="resourceinstanceidfrom", terms=[resourceinstanceid])) query.add_query(bool_query) query.delete(index=RESOURCE_RELATIONS_INDEX) # reindex any related resources query = Query(se) bool_query = Bool() bool_query.filter( Nested(path="ids", query=Terms(field="ids.id", terms=[resourceinstanceid]))) query.add_query(bool_query) results = query.search(index=RESOURCES_INDEX)["hits"]["hits"] for result in results: try: res = Resource.objects.get(pk=result["_id"]) res.load_tiles() res.index() except ObjectDoesNotExist: pass # delete resource index se.delete(index=RESOURCES_INDEX, id=resourceinstanceid) # delete resources from custom indexes for index in settings.ELASTICSEARCH_CUSTOM_INDEXES: es_index = import_class_from_string(index["module"])(index["name"]) es_index.delete_resources(resources=self)
def get_index(name): for index in settings.ELASTICSEARCH_CUSTOM_INDEXES: if index["name"] == name: return import_class_from_string(index["module"])(name) raise SearchIndexNotDefinedError(name=name)
def __init__(self, format=None, **kwargs): kwargs['format'] = format self.writer = import_class_from_string( settings.RESOURCE_FORMATERS[format])(**kwargs)
def __init__(self, format=None, **kwargs): kwargs['format'] = format self.writer = import_class_from_string(settings.RESOURCE_FORMATERS[format])(**kwargs)