def render(self, data, media_type=None, renderer_context={}): queryset = data['queryset'] serializer = data['serializer'] context = data['context'] csv_buffer = Echo() csv_writer = csv.writer(csv_buffer) # Header row if queryset.count(): yield csv_writer.writerow(self.header_fields.values()) # Need to efficiently page through querysets if isinstance(queryset, django.db.models.query.QuerySet): queryset = batched_queryset(queryset, chunksize=25) else: # This should be build into LazySearch object, but it's not... queryset = LazySearchIterator(queryset) # Data rows for item in queryset: items = serializer(item, context=context).data # Sort by `header_fields` ordering ordered = [items[column] for column in self.header_fields] yield csv_writer.writerow([ elem.encode('utf-8') if isinstance(elem, six.text_type) and six.PY2 else elem for elem in ordered ])
def test_delete_during_query(es_client): objects = mommy.make(SimpleObject, _quantity=10) qs = batched_queryset(SimpleObject.objects.all(), chunksize=3) results = [next(qs) for _ in range(3)] # Delete during batched fetch objects[3].delete() # Not yet fetched # Finish batched fetch results.extend(list(qs)) # Querys assert results == (objects[:3] + objects[4:])
def test_create_during_query(es_client): objects = mommy.make(SimpleObject, _quantity=10) qs = batched_queryset(SimpleObject.objects.all(), chunksize=3) results = [next(qs) for _ in range(3)] # Create more objects during batched fetch new_objects = mommy.make(SimpleObject, _quantity=10) # Finish batched fetch results.extend(list(qs)) # Final chunk (based on initial object count 10) would normally be size 1. Number of chunks # fetched is based on initial size, but last chunk fetched will grab enough new objects to fill # chunk (size 3). assert results == (objects + new_objects[:2])
def test_count_batch_queries(es_client): objects = mommy.make(SimpleObject, _quantity=10) try: settings.DEBUG = True # Must be TRUE to track connection queries # 1 query to get initial primary key, plus 1 per batch for chunksize, expected_queries in [(10, 2), (5, 3), (3, 5)]: reset_queries() results = list( batched_queryset(SimpleObject.objects.all(), chunksize=chunksize)) assert objects == results assert len(connection.queries) == expected_queries finally: settings.DEBUG = False
def model_iterator(model, index=None, out=None): if index is None: index = model.search_objects.mapping.index counter = 0 total = model.search_objects.count() if out: out.write("Indexing {} {} objects".format(total, model.__name__)) for obj in batched_queryset(model.objects.all()): if obj.__class__ != model: # TODO: Come up with a better method to avoid redundant indexing continue counter += 1 if counter % 100 == 0: if out: out.write("Indexed {}/{} {} objects".format(counter, total, model.__name__)) yield { "_id": obj.pk, "_index": index, "_type": obj.mapping.doc_type, "_source": obj.to_dict() }
def test_various_chunk_sizes(es_client): objects = mommy.make(SimpleObject, _quantity=10) for size in range(1, 12): assert objects == list( batched_queryset(SimpleObject.objects.all(), chunksize=size))
def test_empty_queryset(es_client): assert list(batched_queryset(SimpleObject.objects.all())) == []