Esempio n. 1
0
def index_custom_indexes(index_name=None,
                         clear_index=True,
                         batch_size=settings.BULK_IMPORT_BATCH_SIZE,
                         quiet=False):
    """
    Indexes any custom indexes, optionally by name

    Keyword Arguments:
    index_name -- if supplied will only reindex the custom index with the given name
    clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation
    batch_size -- the number of records to index as a group, the larger the number to more memory required
    quiet -- Silences the status bar output during certain operations, use in celery operations for example

    """

    if index_name is None:
        for index in settings.ELASTICSEARCH_CUSTOM_INDEXES:
            es_index = import_class_from_string(index["module"])(index["name"])
            es_index.reindex(clear_index=clear_index,
                             batch_size=batch_size,
                             quiet=quiet)
    else:
        es_index = get_index(index_name)
        es_index.reindex(clear_index=clear_index,
                         batch_size=batch_size,
                         quiet=quiet)
Esempio n. 2
0
def index_resources_by_type(resource_types, clear_index=True, index_name=None, batch_size=settings.BULK_IMPORT_BATCH_SIZE):
    """
    Indexes all resources of a given type(s)

    Arguments:
    resource_types -- array of graph ids that represent resource types

    Keyword Arguments:
    clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation
    index_name -- only applies to custom indexes and if given will try and just refresh the data in that index
    batch_size -- the number of records to index as a group, the larger the number to more memory required

    """
    
    status = ''
    se = SearchEngineFactory().create()
    datatype_factory = DataTypeFactory()
    node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')}

    status = ''
    for resource_type in resource_types:
        start = datetime.now()
        resources = Resource.objects.filter(graph_id=str(resource_type))
        graph_name = models.GraphModel.objects.get(graphid=str(resource_type)).name
        print("Indexing resource type '{0}'".format(graph_name))

        if index_name is None:
            q = Query(se=se)
            term = Term(field='graph_id', term=str(resource_type))
            q.add_query(term)
            if clear_index:
                q.delete(index='resources', refresh=True)

            with se.BulkIndexer(batch_size=batch_size, refresh=True) as doc_indexer:
                with se.BulkIndexer(batch_size=batch_size, refresh=True) as term_indexer:
                    for resource in resources:
                        document, terms = resource.get_documents_to_index(fetchTiles=True, datatype_factory=datatype_factory, node_datatypes=node_datatypes)
                        doc_indexer.add(index='resources', id=document['resourceinstanceid'], data=document)
                        for term in terms:
                            term_indexer.add(index='terms', id=term['_id'], data=term['_source'])

            result_summary = {'database': len(resources), 'indexed': se.count(index='resources', body=q.dsl)}
            status = 'Passed' if result_summary['database'] == result_summary['indexed'] else 'Failed'
            print("Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds".format(status, graph_name, result_summary['database'], result_summary['indexed'], (datetime.now()-start).seconds))

            for index in settings.ELASTICSEARCH_CUSTOM_INDEXES:
                es_index = import_class_from_string(index['module'])(index['name'])
                es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index)

        else:
            es_index = get_index(index_name)
            es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index)

    return status
Esempio n. 3
0
 def remove_index(self, name):
     es_index = get_index(name)
     es_index.delete_index()
Esempio n. 4
0
 def register_index(self, name):
     es_index = get_index(name)
     es_index.prepare_index()