def handle(self, *args, **options): if options['recreate']: reinit_index() elif es_conn.indices.exists(INDEX_NAME): # get the index name from the all indicies listing self.stderr.write("Index already exists; can't perform import") return fields = [ 'id', 'action', 'job_guid', 'test', 'subtest', 'status', 'expected', 'message', 'best_classification_id', 'best_is_verified', ] failure_lines = FailureLine.objects.filter(action='test_result') for rows in chunked_qs(failure_lines, options['chunk_size'], fields=fields): inserted = bulk(rows) msg = 'Inserted {} documents from {} FailureLines' self.stdout.write(msg.format(inserted, len(rows))) time.sleep(options['sleep']) count = count_index() self.stdout.write('Index contains {} documents'.format(count))
def test_chunked_qs(test_job): # create 25 failure lines create_failure_lines(test_job, [(test_line, {}) for i in range(25)]) qs = FailureLine.objects.all() chunks = list(chunked_qs(qs, chunk_size=5)) one = chunks[0] two = chunks[1] five = chunks[4] assert len(one) == 5 assert one[0].id == 1 assert one[4].id == 5 assert len(two) == 5 assert two[0].id == 6 assert two[4].id == 10 assert len(five) == 5 assert five[0].id == 21 assert five[4].id == 25
def handle(self, *args, **options): queryset = TextLogError.objects.select_related('step').filter( job__isnull=True) chunk_size = options['chunk_size'] for chunked_queryset in chunked_qs(queryset, chunk_size=chunk_size, fields=['id', 'step', 'job']): if not chunked_queryset: return for row in chunked_queryset: row.job_id = row.step.job_id TextLogError.objects.bulk_update(chunked_queryset, ['job']) logger.warning( 'successfully added job_id in TextLogError table to rows {} to {}' .format(chunked_queryset[0].id, chunked_queryset[-1].id)) logger.warning( 'successfully finished backfilling job_ids in the TextLogError table' )
def handle(self, *args, **options): if options["recreate"]: connection.indices.delete(TestFailureLine._doc_type.index, ignore=404) TestFailureLine.init() elif connection.indices.exists(TestFailureLine._doc_type.index): self.stderr.write("Index already exists; can't perform import") return fields = [ 'id', 'action', 'job_guid', 'test', 'subtest', 'status', 'expected', 'message', 'best_classification_id', 'best_is_verified', ] failure_lines = FailureLine.objects.filter(action='test_result') for rows in chunked_qs(failure_lines, options['chunk_size'], fields=fields): if not rows: break es_lines = [TestFailureLine.from_model(line) for line in rows] self.stdout.write("Inserting %i rows" % len(es_lines)) bulk_insert(es_lines) time.sleep(options['sleep']) count = Search(doc_type=TestFailureLine).count() self.stdout.write("Index contains %i documents" % count)
def test_chunked_qs_with_empty_qs(): chunks = list(chunked_qs(FailureLine.objects.none())) assert len(chunks) == 0