def test_chunked(self):
        # chunking nothing yields nothing.
        eq_(list(chunked([], 1)), [])

        # chunking list where len(list) < n
        eq_(list(chunked([1], 10)), [(1,)])

        # chunking a list where len(list) == n
        eq_(list(chunked([1, 2], 2)), [(1, 2)])

        # chunking list where len(list) > n
        eq_(list(chunked([1, 2, 3, 4, 5], 2)), [(1, 2), (3, 4), (5,)])
    def test_chunked(self):
        # chunking nothing yields nothing.
        eq_(list(chunked([], 1)), [])

        # chunking list where len(list) < n
        eq_(list(chunked([1], 10)), [(1, )])

        # chunking a list where len(list) == n
        eq_(list(chunked([1, 2], 2)), [(1, 2)])

        # chunking list where len(list) > n
        eq_(list(chunked([1, 2, 3, 4, 5], 2)), [(1, 2), (3, 4), (5, )])
예제 #3
0
def index_objects(mapping_type, ids, chunk_size=100, es=None, index=None):
    """Index documents of a specified mapping type.

    This NOT allows for asynchronous indexing.

    If a mapping_type extends Indexable, you can add a ``post_save``
    hook for the model that it's based on like this::

        @receiver(dbsignals.post_save, sender=MyModel)
        def update_in_index(sender, instance, **kw):
            # from elasticutils.contrib.django import tasks
            index_objects(MyMappingType, [instance.id])


    :arg mapping_type: the mapping type for these ids
    :arg ids: the list of ids of things to index
    :arg chunk_size: the size of the chunk for bulk indexing

        .. Note::

           The default chunk_size is 100. The number of documents you
           can bulk index at once depends on the size of the
           documents.

    :arg es: The `Elasticsearch` to use. If you don't specify an
        `Elasticsearch`, it'll use `mapping_type.get_es()`.
    :arg index: The name of the index to use. If you don't specify one
        it'll use `mapping_type.get_index()`.

    """
    if settings.ES_DISABLED:
        return

    if len(ids) == 0:
        log.warning("0 objects being indexed")
    else:
        log.debug('Indexing objects {0}-{1}. [{2}]'.format(
            ids[0], ids[-1], len(ids)))

    # Get the model this mapping type is based on.
    model = mapping_type.get_model()

    # Retrieve all the objects that we're going to index and do it in
    # bulk.
    for id_list in chunked(ids, chunk_size):
        documents = []

        for obj in model.objects.filter(id__in=id_list):
            try:
                documents.append(mapping_type.extract_document(obj.id, obj))
            except StandardError as exc:
                log.exception('Unable to extract document {0}: {1}'.format(
                    obj, repr(exc)))

        if documents:
            mapping_type.bulk_index(documents,
                                    id_field='id',
                                    es=es,
                                    index=index)
예제 #4
0
def index_objects(mapping_type, ids, chunk_size=100, es=None, index=None):
    """Index documents of a specified mapping type.

    This allows for asynchronous indexing.

    If a mapping_type extends Indexable, you can add a ``post_save``
    hook for the model that it's based on like this::

        @receiver(dbsignals.post_save, sender=MyModel)
        def update_in_index(sender, instance, **kw):
            from elasticutils.contrib.django import tasks
            tasks.index_objects.delay(MyMappingType, [instance.id])


    :arg mapping_type: the mapping type for these ids
    :arg ids: the list of ids of things to index
    :arg chunk_size: the size of the chunk for bulk indexing

        .. Note::

           The default chunk_size is 100. The number of documents you
           can bulk index at once depends on the size of the
           documents.

    :arg es: The `Elasticsearch` to use. If you don't specify an
        `Elasticsearch`, it'll use `mapping_type.get_es()`.
    :arg index: The name of the index to use. If you don't specify one
        it'll use `mapping_type.get_index()`.

    """
    if settings.ES_DISABLED:
        return

    log.debug('Indexing objects {0}-{1}. [{2}]'.format(
            ids[0], ids[-1], len(ids)))

    # Get the model this mapping type is based on.
    model = mapping_type.get_model()

    # Retrieve all the objects that we're going to index and do it in
    # bulk.
    for id_list in chunked(ids, chunk_size):
        documents = []

        for obj in model.objects.filter(id__in=id_list):
            try:
                documents.append(mapping_type.extract_document(obj.id, obj))
            except Exception as exc:
                log.exception('Unable to extract document {0}: {1}'.format(
                        obj, repr(exc)))

        if documents:
            mapping_type.bulk_index(documents, id_field='id', es=es, index=index)
예제 #5
0
파일: tasks.py 프로젝트: kausdev/mozillians
def index_objects(mapping_type, ids, chunk_size=100, public_index=False, **kwargs):
    if getattr(settings, 'ES_DISABLED', False):
        return

    es = get_es()
    model = mapping_type.get_model()

    for id_list in chunked(ids, chunk_size):
        documents = []
        qs = model.objects.filter(id__in=id_list)
        index = mapping_type.get_index(public_index)
        if public_index:
            qs = qs.public_indexable().privacy_level(PUBLIC)

        for item in qs:
            documents.append(mapping_type.extract_document(item.id, item))

        mapping_type.bulk_index(documents, id_field='id', es=es, index=index)
        mapping_type.refresh_index(es)
예제 #6
0
def index_objects(mapping_type, ids, chunk_size=100, public_index=False, **kwargs):
    if getattr(settings, 'ES_DISABLED', False):
        return

    es = get_es()
    model = mapping_type.get_model()

    for id_list in chunked(ids, chunk_size):
        documents = []
        qs = model.objects.filter(id__in=id_list)
        index = mapping_type.get_index(public_index)
        if public_index:
            qs = qs.public_indexable().privacy_level(PUBLIC)

        for item in qs:
            documents.append(mapping_type.extract_document(item.id, item))

        mapping_type.bulk_index(documents, id_field='id', es=es, index=index)
        mapping_type.refresh_index(es)
예제 #7
0
파일: tasks.py 프로젝트: labrepo/LabRepo
def index_objects(mapping_type, ids, chunk_size=100, es=None, index=None):
    if settings.ES_DISABLED:
        return

    # create_mapping(mapping_type)
    # Get the model this mapping type is based on.
    model = mapping_type.get_model()
    # Retrieve all the objects that we're going to index and do it in
    # bulk.
    for id_list in chunked(ids, chunk_size):
        documents = []

        for obj in model.objects.filter(id__in=id_list):
            try:
                doc = mapping_type.extract_document(obj.id, obj)
                if isinstance(doc, list):
                    documents.extend(doc)
                else:
                    documents.append(doc)
            except StandardError as exc:
                print 'Unable to extract document {0}: {1}'.format(obj, repr(exc))

        if documents:
            mapping_type.bulk_index(documents, id_field='id', es=es, index=index)