def add_to_batch(self, batch: BulkWriteBatch): """Adds `self` to the supplied batch.""" assert isinstance(batch, BulkWriteBatch) if isinstance(self, BulkWriterCreateOperation): return batch.create( reference=self.reference, document_data=self.document_data, ) if isinstance(self, BulkWriterDeleteOperation): return batch.delete( reference=self.reference, option=self.option, ) if isinstance(self, BulkWriterSetOperation): return batch.set( reference=self.reference, document_data=self.document_data, merge=self.merge, ) if isinstance(self, BulkWriterUpdateOperation): return batch.update( reference=self.reference, field_updates=self.field_updates, option=self.option, ) raise TypeError( f"Unexpected type of {self.__class__.__name__} for batch" ) # pragma: NO COVER
def _send_until_queue_is_empty(self): """First domino in the sending codepath. This does not need to be parallelized for two reasons: 1) Putting this on a worker thread could lead to two running in parallel and thus unpredictable commit ordering or failure to adhere to rate limits. 2) This method only blocks when `self._request_send()` does not immediately return, and in that case, the BulkWriter's ramp-up / throttling logic has determined that it is attempting to exceed the maximum write speed, and so parallelizing this method would not increase performance anyway. Once `self._request_send()` returns, this method calls `self._send_batch()`, which parallelizes itself if that is our SendMode value. And once `self._send_batch()` is called (which does not block if we are sending in parallel), jumps back to the top and re-checks for any queued batches. Note that for sufficiently large data migrations, this can block the submission of additional write operations (e.g., the CRUD methods); but again, that is only if the maximum write speed is being exceeded, and thus this scenario does not actually further reduce performance. """ self._schedule_ready_retries() while self._queued_batches: # For FIFO order, add to the right of this deque (via `append`) and take # from the left (via `popleft`). operations: List[ BulkWriterOperation] = self._queued_batches.popleft() # Block until we are cleared for takeoff, which is fine because this # returns instantly unless the rate limiting logic determines that we # are attempting to exceed the maximum write speed. self._request_send(len(operations)) # Handle some bookkeeping, and ultimately put these bits on the wire. batch = BulkWriteBatch(client=self._client) op: BulkWriterOperation for op in operations: op.add_to_batch(batch) # `_send_batch` is optionally parallelized by `@_with_send_mode`. future = self._send_batch(batch=batch, operations=operations) self._pending_batch_futures.append(future) self._schedule_ready_retries()
def _send(self, batch: BulkWriteBatch) -> BatchWriteResponse: """Hook for overwriting the sending of batches. As this is only called from `_send_batch()`, this is parallelized if we are in that mode. """ return batch.commit() # pragma: NO COVER
def _make_bulk_write_batch(*args, **kwargs): from google.cloud.firestore_v1.bulk_batch import BulkWriteBatch return BulkWriteBatch(*args, **kwargs)