def insert(collection_name, docs, check_keys, safe, last_error_args, continue_on_error, uuid_subtype): """Get an **insert** message. """ max_bson_size = 0 options = 0 if continue_on_error: options += 1 data = struct.pack("<i", options) data += bson._make_c_string(collection_name) encoded = [bson.BSON.encode(doc, check_keys, uuid_subtype) for doc in docs] if not encoded: raise InvalidOperation("cannot do an empty bulk insert") max_bson_size = max(map(len, encoded)) data += "".join(encoded) if safe: (_, insert_message) = __pack_message(2002, data) (request_id, error_message, _) = __last_error(last_error_args) return (request_id, insert_message + error_message, max_bson_size) else: (request_id, insert_message) = __pack_message(2002, data) return (request_id, insert_message, max_bson_size)
def limit(self, limit): """Limits the number of results to be returned by this cursor. Raises :exc:`TypeError` if `limit` is not an integer. Raises :exc:`~pymongo.errors.InvalidOperation` if this :class:`Cursor` has already been used. The last `limit` applied to this cursor takes precedence. A limit of ``0`` is equivalent to no limit. :Parameters: - `limit`: the number of results to return .. mongodoc:: limit """ if not isinstance(limit, int): raise TypeError("limit must be an integer") if self.__exhaust: raise InvalidOperation("Can't use limit and exhaust together.") self.__check_okay_to_chain() self.__empty = False self.__limit = limit return self
def _apply_to(self, command, is_retryable, read_preference): self._check_ended() self._server_session.last_use = monotonic.time() command['lsid'] = self._server_session.session_id if not self._in_transaction: self._transaction.state = _TxnState.NONE if is_retryable: self._server_session._transaction_id += 1 command['txnNumber'] = self._server_session.transaction_id return if self._in_transaction: if read_preference != ReadPreference.PRIMARY: raise InvalidOperation( 'read preference in a transaction must be primary, not: ' '%r' % (read_preference, )) if self._transaction.state == _TxnState.STARTING: # First command begins a new transaction. self._transaction.state = _TxnState.IN_PROGRESS command['startTransaction'] = True if self._transaction.opts.read_concern: rc = self._transaction.opts.read_concern.document else: rc = {} if (self.options.causal_consistency and self.operation_time is not None): rc['afterClusterTime'] = self.operation_time if rc: command['readConcern'] = rc command['txnNumber'] = self._server_session.transaction_id command['autocommit'] = False
def address(self): """(host, port) of the current standalone, primary, or mongos, or None. Accessing :attr:`address` raises :exc:`~.errors.InvalidOperation` if the client is load-balancing among mongoses, since there is no single address. Use :attr:`nodes` instead. If the client is not connected, this will block until a connection is established or raise ServerSelectionTimeoutError if no server is available. .. versionadded:: 3.0 """ topology_type = self._topology._description.topology_type if topology_type == TOPOLOGY_TYPE.Sharded: raise InvalidOperation( 'Cannot use "address" property when load balancing among' ' mongoses, use "nodes" instead.') if topology_type not in (TOPOLOGY_TYPE.ReplicaSetWithPrimary, TOPOLOGY_TYPE.Single): return None return self._server_property('address')
def _finish_transaction(self, command_name): self._check_ended() if not self._in_transaction_or_auto_start(): raise InvalidOperation("No transaction started") try: if self._server_session.statement_id == 0: # Not really started. self._server_session._transaction_id += 1 return # TODO: retryable. And it's weird to pass parse_write_concern_error # from outside database.py. self._client.admin.command( command_name, session=self, write_concern=self._transaction.opts.write_concern, parse_write_concern_error=True) finally: self._server_session.reset_transaction() self._transaction = None
def _finish_transaction(self, command_name): self._check_ended() if not self._in_transaction_or_auto_start(): raise InvalidOperation("No transaction started") try: if not self._transaction.sent_command: # Not really started. return # TODO: commitTransaction should be a retryable write. # Use _command directly because commit/abort are writes and must # always go to the primary. with self._client._socket_for_writes() as sock_info: return self._client.admin._command( sock_info, command_name, session=self, write_concern=self._transaction.opts.write_concern, parse_write_concern_error=True) finally: self._transaction = None
async def get(self, **kwargs): if not self.table: raise InvalidOperation("No table") query = kwargs.pop("query", kwargs) sort = kwargs.pop("sort", None) many = kwargs.pop("many", False) limit = kwargs.pop("limit", None) if many: data = await self.table.find(query).sort(sort).to_list(limit) if sort else await self.table.find(query).to_list(limit) else: if sort: docs = await self.table.find(query).sort(sort).to_list(1) data = docs[0] if docs else None else: data = await self.table.find_one(query) # data = await self.table.find(query).to_list(limit) if many else await self.table.find_one(query) if not data: raise NotFound(query) self.load(data, many)
def execute_op_msg_no_results(self, sock_info, generator): """Execute write commands with OP_MSG and w=0 writeConcern, unordered. """ db_name = self.collection.database.name client = self.collection.database.client listeners = client._event_listeners op_id = _randint() if not self.current_run: self.current_run = next(generator) run = self.current_run while run: cmd = SON([(_COMMANDS[run.op_type], self.collection.name), ('ordered', False), ('writeConcern', { 'w': 0 })]) bwc = _BulkWriteContext(db_name, cmd, sock_info, op_id, listeners, None) while run.idx_offset < len(run.ops): check_keys = run.op_type == _INSERT ops = islice(run.ops, run.idx_offset, None) # Run as many ops as possible. request_id, msg, to_send = _do_bulk_write_command( self.namespace, run.op_type, cmd, ops, check_keys, self.collection.codec_options, bwc) if not to_send: raise InvalidOperation("cannot do an empty bulk write") run.idx_offset += len(to_send) # Though this isn't strictly a "legacy" write, the helper # handles publishing commands and sending our message # without receiving a result. Send 0 for max_doc_size # to disable size checking. Size checking is handled while # the documents are encoded to BSON. bwc.legacy_write(request_id, msg, 0, False, to_send) self.current_run = run = next(generator, None)
def insert(collection_name, docs, check_keys, safe, last_error_args, continue_on_error, uuid_subtype, codec_options=None): """Get an **insert** message. .. note:: As of PyMongo 2.6, this function is no longer used. It is being kept (with tests) for backwards compatibility with 3rd party libraries that may currently be using it, but will likely be removed in a future release. """ if codec_options is not None: uuid_subtype = codec_options.uuid_representation options = 0 if continue_on_error: options += 1 data = struct.pack("<i", options) data += bson._make_c_string(collection_name) encoded = [bson.BSON.encode(doc, check_keys, uuid_subtype) for doc in docs] if not encoded: raise InvalidOperation("cannot do an empty bulk insert") max_bson_size = max(list(map(len, encoded))) data += _EMPTY.join(encoded) if safe: (_, insert_message) = __pack_message(2002, data) (request_id, error_message, _) = __last_error(collection_name, last_error_args) return (request_id, insert_message + error_message, max_bson_size) else: (request_id, insert_message) = __pack_message(2002, data) return (request_id, insert_message, max_bson_size)
def start_transaction(self, read_concern=None, write_concern=None, read_preference=None, max_commit_time_ms=None): """Start a multi-statement transaction. Takes the same arguments as :class:`TransactionOptions`. .. versionchanged:: 3.9 Added the ``max_commit_time_ms`` option. .. versionadded:: 3.7 """ self._check_ended() if self.in_transaction: raise InvalidOperation("Transaction already in progress") read_concern = self._inherit_option("read_concern", read_concern) write_concern = self._inherit_option("write_concern", write_concern) read_preference = self._inherit_option("read_preference", read_preference) if max_commit_time_ms is None: opts = self.options.default_transaction_options if opts: max_commit_time_ms = opts.max_commit_time_ms self._transaction.opts = TransactionOptions(read_concern, write_concern, read_preference, max_commit_time_ms) self._transaction.reset() self._transaction.state = _TxnState.STARTING self._start_retryable_write() return _TransactionContext(self)
def start_transaction(self, read_concern=None, write_concern=None, read_preference=None): """Start a multi-statement transaction. Takes the same arguments as :class:`TransactionOptions`. .. versionadded:: 3.7 """ self._check_ended() if self._in_transaction: raise InvalidOperation("Transaction already in progress") read_concern = self._inherit_option("read_concern", read_concern) write_concern = self._inherit_option("write_concern", write_concern) read_preference = self._inherit_option("read_preference", read_preference) self._transaction = _Transaction( TransactionOptions(read_concern, write_concern, read_preference)) self._server_session._transaction_id += 1 return _TransactionContext(self)
def _ensure_opened(self): """Start monitors, or restart after a fork. Hold the lock when calling this. """ if self._closed: raise InvalidOperation("Cannot use MongoClient after close") if not self._opened: self._opened = True self._update_servers() # Start or restart the events publishing thread. if self._publish_tp or self._publish_server: self.__events_executor.open() # Start the SRV polling thread. if self._srv_monitor and (self.description.topology_type in SRV_POLLING_TOPOLOGIES): self._srv_monitor.open() if self._settings.load_balanced: # Emit initial SDAM events for load balancer mode. self._process_change( ServerDescription( self._seed_addresses[0], Hello({ "ok": 1, "serviceId": self._topology_id, "maxWireVersion": 13 }), )) # Ensure that the monitors are open. for server in self._servers.values(): server.open()
def start_transaction(self, read_concern=None, write_concern=None, read_preference=None): """Start a multi-statement transaction. Takes the same arguments as :class:`TransactionOptions`. .. versionadded:: 3.7 """ self._check_ended() if self._in_transaction: raise InvalidOperation("Transaction already in progress") read_concern = self._inherit_option("read_concern", read_concern) write_concern = self._inherit_option("write_concern", write_concern) read_preference = self._inherit_option( "read_preference", read_preference) self._transaction.opts = TransactionOptions( read_concern, write_concern, read_preference) self._transaction.reset() self._transaction.state = _TxnState.STARTING self._start_retryable_write() return _TransactionContext(self)
def _do_batched_insert(collection_name, docs, check_keys, safe, last_error_args, continue_on_error, opts, ctx): """Insert `docs` using multiple batches. """ def _insert_message(insert_message, send_safe): """Build the insert message with header and GLE. """ request_id, final_message = __pack_message(2002, insert_message) if send_safe: request_id, error_message, _ = __last_error( collection_name, last_error_args) final_message += error_message return request_id, final_message send_safe = safe or not continue_on_error last_error = None data = StringIO() data.write(struct.pack("<i", int(continue_on_error))) data.write(bson._make_c_string(collection_name)) message_length = begin_loc = data.tell() has_docs = False to_send = [] for doc in docs: encoded = bson.BSON.encode(doc, check_keys, opts) encoded_length = len(encoded) too_large = (encoded_length > ctx.max_bson_size) message_length += encoded_length if message_length < ctx.max_message_size and not too_large: data.write(encoded) to_send.append(doc) has_docs = True continue if has_docs: # We have enough data, send this message. try: request_id, msg = _insert_message(data.getvalue(), send_safe) ctx.legacy_write(request_id, msg, 0, send_safe, to_send) # Exception type could be OperationFailure or a subtype # (e.g. DuplicateKeyError) except OperationFailure as exc: # Like it says, continue on error... if continue_on_error: # Store exception details to re-raise after the final batch. last_error = exc # With unacknowledged writes just return at the first error. elif not safe: return # With acknowledged writes raise immediately. else: raise if too_large: _raise_document_too_large("insert", encoded_length, ctx.max_bson_size) message_length = begin_loc + encoded_length data.seek(begin_loc) data.truncate() data.write(encoded) to_send = [doc] if not has_docs: raise InvalidOperation("cannot do an empty bulk insert") request_id, msg = _insert_message(data.getvalue(), safe) ctx.legacy_write(request_id, msg, 0, safe, to_send) # Re-raise any exception stored due to continue_on_error if last_error is not None: raise last_error
def _check_ended(self): if self._server_session is None: raise InvalidOperation("Cannot use ended session")
def _copy_database(fromdb, todb, fromhost, mechanism, username, password, sock_info, cmd_func): """Copy a database, perhaps from a remote host. :Parameters: - `fromdb`: Source database. - `todb`: Target database. - `fromhost`: Source host like 'foo.com', 'foo.com:27017', or None. - `mechanism`: An authentication mechanism. - `username`: A str or unicode, or None. - `password`: A str or unicode, or None. - `sock_info`: A SocketInfo instance. - `cmd_func`: A callback taking args sock_info, database, command doc. """ if not isinstance(fromdb, str): raise TypeError('from_name must be an instance ' 'of %s' % (str.__name__, )) if not isinstance(todb, str): raise TypeError('to_name must be an instance ' 'of %s' % (str.__name__, )) _check_database_name(todb) warnings.warn( "copy_database is deprecated. Use the raw 'copydb' command" " or db.copyDatabase() in the mongo shell. See" " doc/examples/copydb.", DeprecationWarning, stacklevel=2) # It would be better if the user told us what mechanism to use, but for # backwards compatibility with earlier PyMongos we don't require the # mechanism. Hope 'fromhost' runs the same version as the target. if mechanism == 'DEFAULT': if sock_info.max_wire_version >= 3: mechanism = 'SCRAM-SHA-1' else: mechanism = 'MONGODB-CR' if username is not None: if mechanism == 'SCRAM-SHA-1': credentials = auth._build_credentials_tuple(mech=mechanism, source='admin', user=username, passwd=password, extra=None) try: auth._copydb_scram_sha1(credentials=credentials, sock_info=sock_info, cmd_func=cmd_func, fromdb=fromdb, todb=todb, fromhost=fromhost) except OperationFailure as exc: errmsg = exc.details and exc.details.get('errmsg') or '' if 'no such cmd: saslStart' in errmsg: explanation = ("%s doesn't support SCRAM-SHA-1, pass" " mechanism='MONGODB-CR' to copy_database" % fromhost) raise OperationFailure(explanation, exc.code, exc.details) else: raise elif mechanism == 'MONGODB-CR': get_nonce_cmd = SON([('copydbgetnonce', 1), ('fromhost', fromhost)]) get_nonce_response, _ = cmd_func(sock_info, 'admin', get_nonce_cmd) nonce = get_nonce_response['nonce'] copydb_cmd = SON([('copydb', 1), ('fromdb', fromdb), ('todb', todb)]) copydb_cmd['username'] = username copydb_cmd['nonce'] = nonce copydb_cmd['key'] = auth._auth_key(nonce, username, password) if fromhost is not None: copydb_cmd['fromhost'] = fromhost cmd_func(sock_info, 'admin', copydb_cmd) else: raise InvalidOperation('Authentication mechanism %r not supported' ' for copy_database' % mechanism) else: # No username. copydb_cmd = SON([('copydb', 1), ('fromdb', fromdb), ('todb', todb)]) if fromhost: copydb_cmd['fromhost'] = fromhost cmd_func(sock_info, 'admin', copydb_cmd)
def __send_message(self, operation): """Send a query or getmore operation and handles the response. If operation is ``None`` this is an exhaust cursor, which reads the next result batch off the exhaust socket instead of sending getMore messages to the server. Can raise ConnectionFailure. """ client = self.__collection.database.client # OP_MSG is required to support exhaust cursors with encryption. if client._encrypter and self.__exhaust: raise InvalidOperation( "exhaust cursors do not support auto encryption") try: response = client._run_operation_with_response( operation, self._unpack_response, exhaust=self.__exhaust, address=self.__address) except OperationFailure: self.__killed = True # Make sure exhaust socket is returned immediately, if necessary. self.__die() # If this is a tailable cursor the error is likely # due to capped collection roll over. Setting # self.__killed to True ensures Cursor.alive will be # False. No need to re-raise. if self.__query_flags & _QUERY_OPTIONS["tailable_cursor"]: return raise except NotMasterError: # Don't send kill cursors to another server after a "not master" # error. It's completely pointless. self.__killed = True # Make sure exhaust socket is returned immediately, if necessary. self.__die() raise except ConnectionFailure: # Don't try to send kill cursors on another socket # or to another server. It can cause a _pinValue # assertion on some server releases if we get here # due to a socket timeout. self.__killed = True self.__die() raise except Exception: # Close the cursor self.__die() raise self.__address = response.address if self.__exhaust and not self.__exhaust_mgr: # 'response' is an ExhaustResponse. self.__exhaust_mgr = _SocketManager(response.socket_info, response.pool) cmd_name = operation.name docs = response.docs if response.from_command: if cmd_name != "explain": cursor = docs[0]['cursor'] self.__id = cursor['id'] if cmd_name == 'find': documents = cursor['firstBatch'] # Update the namespace used for future getMore commands. ns = cursor.get('ns') if ns: self.__dbname, self.__collname = ns.split('.', 1) else: documents = cursor['nextBatch'] self.__data = deque(documents) self.__retrieved += len(documents) else: self.__id = 0 self.__data = deque(docs) self.__retrieved += len(docs) else: self.__id = response.data.cursor_id self.__data = deque(docs) self.__retrieved += response.data.number_returned if self.__id == 0: self.__killed = True # Don't wait for garbage collection to call __del__, return the # socket and the session to the pool now. self.__die() if self.__limit and self.__id and self.__limit <= self.__retrieved: self.__die()
def __getitem__(self, index): raise InvalidOperation("Cannot call __getitem__ on RawBatchCursor")
def _check_closed(self): if self._encryption is None: raise InvalidOperation("Cannot use closed ClientEncryption")
def __check_okay_to_chain(self): """Check if it is okay to chain more options onto this cursor. """ if self.__retrieved or self.__id is not None: raise InvalidOperation("cannot set options after executing query")
def _execute_command(self, generator, write_concern, session, sock_info, op_id, retryable, full_result): if sock_info.max_wire_version < 5 and self.uses_collation: raise ConfigurationError( 'Must be connected to MongoDB 3.4+ to use a collation.') if sock_info.max_wire_version < 6 and self.uses_array_filters: raise ConfigurationError( 'Must be connected to MongoDB 3.6+ to use arrayFilters.') db_name = self.collection.database.name client = self.collection.database.client listeners = client._event_listeners if not self.current_run: self.current_run = next(generator) run = self.current_run # sock_info.command validates the session, but we use # sock_info.write_command. sock_info.validate_session(client, session) while run: cmd = SON([(_COMMANDS[run.op_type], self.collection.name), ('ordered', self.ordered)]) if not write_concern.is_server_default: cmd['writeConcern'] = write_concern.document if self.bypass_doc_val and sock_info.max_wire_version >= 4: cmd['bypassDocumentValidation'] = True bwc = _BulkWriteContext(db_name, cmd, sock_info, op_id, listeners, session) while run.idx_offset < len(run.ops): if session: session._apply_to(cmd, retryable, ReadPreference.PRIMARY) sock_info.send_cluster_time(cmd, session, client) check_keys = run.op_type == _INSERT ops = islice(run.ops, run.idx_offset, None) # Run as many ops as possible. request_id, msg, to_send = _do_bulk_write_command( self.namespace, run.op_type, cmd, ops, check_keys, self.collection.codec_options, bwc) if not to_send: raise InvalidOperation("cannot do an empty bulk write") result = bwc.write_command(request_id, msg, to_send) client._receive_cluster_time(result, session) # Retryable writeConcernErrors halt the execution of this run. wce = result.get('writeConcernError', {}) if wce.get('code', 0) in _RETRYABLE_ERROR_CODES: # Synthesize the full bulk result without modifying the # current one because this write operation may be retried. full = copy.deepcopy(full_result) _merge_command(run, full, run.idx_offset, result) _raise_bulk_write_error(full) _merge_command(run, full_result, run.idx_offset, result) # We're no longer in a retry once a command succeeds. self.retrying = False if self.ordered and "writeErrors" in result: break run.idx_offset += len(to_send) # We're supposed to continue if errors are # at the write concern level (e.g. wtimeout) if self.ordered and full_result['writeErrors']: break # Reset our state self.current_run = run = next(generator, None)
def __init__(self, collection, spec=None, fields=None, skip=0, limit=0, timeout=True, snapshot=False, tailable=False, sort=None, max_scan=None, as_class=None, slave_okay=False, await_data=False, partial=False, manipulate=True, read_preference=ReadPreference.PRIMARY, tag_sets=[{}], secondary_acceptable_latency_ms=None, exhaust=False, compile_re=True, oplog_replay=False, modifiers=None, _must_use_master=False, _codec_options=None, **kwargs): """Create a new cursor. Should not be called directly by application developers - see :meth:`~pymongo.collection.Collection.find` instead. .. mongodoc:: cursors """ # Backport aliases. if 'filter' in kwargs: spec = kwargs['filter'] if 'projection' in kwargs: fields = kwargs['projection'] if 'no_cursor_timeout' in kwargs: timeout = not kwargs['no_cursor_timeout'] if 'allow_partial_results' in kwargs: partial = kwargs['allow_partial_results'] if 'cursor_type' in kwargs: crt = kwargs['cursor_type'] if crt not in (CursorType.NON_TAILABLE, CursorType.TAILABLE, CursorType.TAILABLE_AWAIT, CursorType.EXHAUST): raise ValueError("not a valid value for cursor_type") exhaust = crt == CursorType.EXHAUST tailable = crt == CursorType.TAILABLE if crt == CursorType.TAILABLE_AWAIT: await_data = True tailable = True if modifiers is not None: if not isinstance(modifiers, dict): raise TypeError("%s must be an instance of dict or subclass" % (modifiers, )) if '$snapshot' in modifiers: snapshot = modifiers['$snapshot'] if '$maxScan' in modifiers: max_scan = modifiers['$maxScan'] self.__id = None if spec is None: spec = {} if not isinstance(spec, dict): raise TypeError("spec must be an instance of dict") if not isinstance(skip, int): raise TypeError("skip must be an instance of int") if not isinstance(limit, int): raise TypeError("limit must be an instance of int") if not isinstance(timeout, bool): raise TypeError("timeout must be an instance of bool") if not isinstance(snapshot, bool): raise TypeError("snapshot must be an instance of bool") if not isinstance(tailable, bool): raise TypeError("tailable must be an instance of bool") if not isinstance(slave_okay, bool): raise TypeError("slave_okay must be an instance of bool") if not isinstance(await_data, bool): raise TypeError("await_data must be an instance of bool") if not isinstance(partial, bool): raise TypeError("partial must be an instance of bool") if not isinstance(exhaust, bool): raise TypeError("exhaust must be an instance of bool") if not isinstance(oplog_replay, bool): raise TypeError("oplog_replay must be an instance of bool") if fields is not None: if not fields: fields = {"_id": 1} if not isinstance(fields, dict): fields = helpers._fields_list_to_dict(fields) self.__collection = collection self.__spec = spec self.__fields = fields self.__skip = skip self.__limit = limit self.__max_time_ms = None self.__batch_size = 0 self.__max = None self.__min = None self.__modifiers = modifiers and modifiers.copy() or {} # Exhaust cursor support if self.__collection.database.connection.is_mongos and exhaust: raise InvalidOperation('Exhaust cursors are ' 'not supported by mongos') if limit and exhaust: raise InvalidOperation("Can't use limit and exhaust together.") self.__exhaust = exhaust self.__exhaust_mgr = None # This is ugly. People want to be able to do cursor[5:5] and # get an empty result set (old behavior was an # exception). It's hard to do that right, though, because the # server uses limit(0) to mean 'no limit'. So we set __empty # in that case and check for it when iterating. We also unset # it anytime we change __limit. self.__empty = False self.__snapshot = snapshot self.__ordering = sort and helpers._index_document(sort) or None self.__max_scan = max_scan self.__explain = False self.__hint = None self.__comment = None self.__slave_okay = slave_okay self.__manipulate = manipulate self.__read_preference = read_preference self.__tag_sets = tag_sets self.__secondary_acceptable_latency_ms = secondary_acceptable_latency_ms self.__compile_re = compile_re self.__must_use_master = _must_use_master copts = _codec_options or collection.codec_options if as_class is not None: copts = _CodecOptions(as_class, copts.tz_aware, copts.uuid_representation) self.__codec_options = copts self.__data = deque() self.__connection_id = None self.__retrieved = 0 self.__killed = False self.__query_flags = 0 if tailable: self.__query_flags |= _QUERY_OPTIONS["tailable_cursor"] if not timeout: self.__query_flags |= _QUERY_OPTIONS["no_timeout"] if tailable and await_data: self.__query_flags |= _QUERY_OPTIONS["await_data"] if exhaust: self.__query_flags |= _QUERY_OPTIONS["exhaust"] if partial: self.__query_flags |= _QUERY_OPTIONS["partial"] if oplog_replay: self.__query_flags |= _QUERY_OPTIONS["oplog_replay"] # this is for passing network_timeout through if it's specified # need to use kwargs as None is a legit value for network_timeout self.__kwargs = kwargs
def try_next(self): """Advance the cursor without blocking indefinitely. This method returns the next change document without waiting indefinitely for the next change. For example:: with db.collection.watch() as stream: while stream.alive: change = stream.try_next() # Note that the ChangeStream's resume token may be updated # even when no changes are returned. print("Current resume token: %r" % (stream.resume_token,)) if change is not None: print("Change document: %r" % (change,)) continue # We end up here when there are no recent changes. # Sleep for a while before trying again to avoid flooding # the server with getMore requests when no changes are # available. time.sleep(10) If no change document is cached locally then this method runs a single getMore command. If the getMore yields any documents, the next document is returned, otherwise, if the getMore returns no documents (because there have been no changes) then ``None`` is returned. :Returns: The next change document or ``None`` when no document is available after running a single getMore or when the cursor is closed. .. versionadded:: 3.8 """ # Attempt to get the next change with at most one getMore and at most # one resume attempt. try: change = self._cursor._try_next(True) except ConnectionFailure: self._resume() change = self._cursor._try_next(False) except OperationFailure as exc: if (exc.code in _NON_RESUMABLE_GETMORE_ERRORS or exc.has_error_label("NonResumableChangeStreamError")): raise self._resume() change = self._cursor._try_next(False) # If no changes are available. if change is None: # We have either iterated over all documents in the cursor, # OR the most-recently returned batch is empty. In either case, # update the cached resume token with the postBatchResumeToken if # one was returned. We also clear the startAtOperationTime. if self._cursor._post_batch_resume_token is not None: self._resume_token = self._cursor._post_batch_resume_token self._start_at_operation_time = None return change # Else, changes are available. try: resume_token = change['_id'] except KeyError: self.close() raise InvalidOperation( "Cannot provide resume functionality when the resume " "token is missing.") # If this is the last change document from the current batch, cache the # postBatchResumeToken. if (not self._cursor._has_next() and self._cursor._post_batch_resume_token): resume_token = self._cursor._post_batch_resume_token # Hereafter, don't use startAfter; instead use resumeAfter. self._uses_start_after = False self._uses_resume_after = True # Cache the resume token and clear startAtOperationTime. self._resume_token = resume_token self._start_at_operation_time = None if self._decode_custom: return _bson_to_dict(change.raw, self._orig_codec_options) return change
def _do_batched_write_command(namespace, operation, command, docs, check_keys, opts, ctx): """Execute a batch of insert, update, or delete commands. """ max_bson_size = ctx.max_bson_size max_write_batch_size = ctx.max_write_batch_size # Max BSON object size + 16k - 2 bytes for ending NUL bytes. # Server guarantees there is enough room: SERVER-10643. max_cmd_size = max_bson_size + _COMMAND_OVERHEAD ordered = command.get('ordered', True) buf = StringIO() # Save space for message length and request id buf.write(_ZERO_64) # responseTo, opCode buf.write(b"\x00\x00\x00\x00\xd4\x07\x00\x00") # No options buf.write(_ZERO_32) # Namespace as C string buf.write(b(namespace)) buf.write(_ZERO_8) # Skip: 0, Limit: -1 buf.write(_SKIPLIM) # Where to write command document length command_start = buf.tell() buf.write(bson.BSON.encode(command)) # Start of payload buf.seek(-1, 2) # Work around some Jython weirdness. buf.truncate() try: buf.write(_OP_MAP[operation]) except KeyError: raise InvalidOperation('Unknown command') if operation in (_UPDATE, _DELETE): check_keys = False # Where to write list document length list_start = buf.tell() - 4 to_send = [] def send_message(): """Finalize and send the current OP_QUERY message. """ # Close list and command documents buf.write(_ZERO_16) # Write document lengths and request id length = buf.tell() buf.seek(list_start) buf.write(struct.pack('<i', length - list_start - 1)) buf.seek(command_start) buf.write(struct.pack('<i', length - command_start)) buf.seek(4) request_id = _randint() buf.write(struct.pack('<i', request_id)) buf.seek(0) buf.write(struct.pack('<i', length)) return ctx.write_command(request_id, buf.getvalue(), to_send) # If there are multiple batches we'll # merge results in the caller. results = [] idx = 0 idx_offset = 0 has_docs = False for doc in docs: has_docs = True # Encode the current operation key = b(str(idx)) value = bson.BSON.encode(doc, check_keys, opts) # Send a batch? enough_data = (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size enough_documents = (idx >= max_write_batch_size) if enough_data or enough_documents: if not idx: write_op = "insert" if operation == _INSERT else None _raise_document_too_large(write_op, len(value), max_bson_size) result = send_message() results.append((idx_offset, result)) if ordered and "writeErrors" in result: return results # Truncate back to the start of list elements buf.seek(list_start + 4) buf.truncate() idx_offset += idx idx = 0 key = b'0' to_send = [] buf.write(_BSONOBJ) buf.write(key) buf.write(_ZERO_8) buf.write(value) to_send.append(doc) idx += 1 if not has_docs: raise InvalidOperation("cannot do an empty bulk write") results.append((idx_offset, send_message())) return results
def _disallow_transactions(session): if session and session.in_transaction: raise InvalidOperation( 'GridFS does not support multi-document transactions')
def _do_batched_write_command(namespace, operation, command, docs, check_keys, opts, ctx): """Create the next batched insert, update, or delete command. """ max_bson_size = ctx.max_bson_size max_write_batch_size = ctx.max_write_batch_size # Max BSON object size + 16k - 2 bytes for ending NUL bytes. # Server guarantees there is enough room: SERVER-10643. max_cmd_size = max_bson_size + _COMMAND_OVERHEAD buf = StringIO() # Save space for message length and request id buf.write(_ZERO_64) # responseTo, opCode buf.write(b"\x00\x00\x00\x00\xd4\x07\x00\x00") # No options buf.write(_ZERO_32) # Namespace as C string buf.write(b(namespace)) buf.write(_ZERO_8) # Skip: 0, Limit: -1 buf.write(_SKIPLIM) # Where to write command document length command_start = buf.tell() buf.write(bson.BSON.encode(command)) # Start of payload buf.seek(-1, 2) # Work around some Jython weirdness. buf.truncate() try: buf.write(_OP_MAP[operation]) except KeyError: raise InvalidOperation('Unknown command') if operation in (_UPDATE, _DELETE): check_keys = False # Where to write list document length list_start = buf.tell() - 4 to_send = [] idx = 0 for doc in docs: # Encode the current operation key = b(str(idx)) value = bson.BSON.encode(doc, check_keys, opts) # Is there enough room to add this document? max_cmd_size accounts for # the two trailing null bytes. enough_data = (buf.tell() + len(key) + len(value)) >= max_cmd_size enough_documents = (idx >= max_write_batch_size) if enough_data or enough_documents: if not idx: write_op = "insert" if operation == _INSERT else None _raise_document_too_large(write_op, len(value), max_bson_size) break buf.write(_BSONOBJ) buf.write(key) buf.write(_ZERO_8) buf.write(value) to_send.append(doc) idx += 1 # Finalize the current OP_QUERY message. # Close list and command documents buf.write(_ZERO_16) # Write document lengths and request id length = buf.tell() buf.seek(list_start) buf.write(struct.pack('<i', length - list_start - 1)) buf.seek(command_start) buf.write(struct.pack('<i', length - command_start)) buf.seek(4) request_id = _randint() buf.write(struct.pack('<i', request_id)) buf.seek(0) buf.write(struct.pack('<i', length)) return request_id, buf.getvalue(), to_send
def check_session_auth_matches(self, session): """Raise error if a ClientSession is logged in as a different user.""" if session and session._authset != self.authset: raise InvalidOperation('session was used after authenticating' ' with different credentials')
def __init__(self, collection, filter=None, projection=None, skip=0, limit=0, no_cursor_timeout=False, cursor_type=CursorType.NON_TAILABLE, sort=None, allow_partial_results=False, oplog_replay=False, modifiers=None, batch_size=0, collation=None, hint=None, max_scan=None, max_time_ms=None, max=None, min=None, return_key=False, show_record_id=False, snapshot=False, comment=None, session=None, allow_disk_use=None): """Create a new cursor. Should not be called directly by application developers - see :meth:`~pymongo.collection.Collection.find` instead. .. mongodoc:: cursors """ # Initialize all attributes used in __del__ before possibly raising # an error to avoid attribute errors during garbage collection. self.__id = None self.__exhaust = False self.__exhaust_mgr = None self.__killed = False if session: self.__session = session self.__explicit_session = True else: self.__session = None self.__explicit_session = False spec = filter if spec is None: spec = {} validate_is_mapping("filter", spec) if not isinstance(skip, int): raise TypeError("skip must be an instance of int") if not isinstance(limit, int): raise TypeError("limit must be an instance of int") validate_boolean("no_cursor_timeout", no_cursor_timeout) if no_cursor_timeout and not self.__explicit_session: warnings.warn( "use an explicit session with no_cursor_timeout=True " "otherwise the cursor may still timeout after " "30 minutes, for more info see " "https://docs.mongodb.com/v4.4/reference/method/" "cursor.noCursorTimeout/" "#session-idle-timeout-overrides-nocursortimeout", UserWarning, stacklevel=2) if cursor_type not in (CursorType.NON_TAILABLE, CursorType.TAILABLE, CursorType.TAILABLE_AWAIT, CursorType.EXHAUST): raise ValueError("not a valid value for cursor_type") validate_boolean("allow_partial_results", allow_partial_results) validate_boolean("oplog_replay", oplog_replay) if modifiers is not None: warnings.warn("the 'modifiers' parameter is deprecated", DeprecationWarning, stacklevel=2) validate_is_mapping("modifiers", modifiers) if not isinstance(batch_size, int): raise TypeError("batch_size must be an integer") if batch_size < 0: raise ValueError("batch_size must be >= 0") # Only set if allow_disk_use is provided by the user, else None. if allow_disk_use is not None: allow_disk_use = validate_boolean("allow_disk_use", allow_disk_use) if projection is not None: if not projection: projection = {"_id": 1} projection = helpers._fields_list_to_dict(projection, "projection") self.__collection = collection self.__spec = spec self.__projection = projection self.__skip = skip self.__limit = limit self.__batch_size = batch_size self.__modifiers = modifiers and modifiers.copy() or {} self.__ordering = sort and helpers._index_document(sort) or None self.__max_scan = max_scan self.__explain = False self.__comment = comment self.__max_time_ms = max_time_ms self.__max_await_time_ms = None self.__max = max self.__min = min self.__collation = validate_collation_or_none(collation) self.__return_key = return_key self.__show_record_id = show_record_id self.__allow_disk_use = allow_disk_use self.__snapshot = snapshot self.__set_hint(hint) # Exhaust cursor support if cursor_type == CursorType.EXHAUST: if self.__collection.database.client.is_mongos: raise InvalidOperation('Exhaust cursors are ' 'not supported by mongos') if limit: raise InvalidOperation("Can't use limit and exhaust together.") self.__exhaust = True # This is ugly. People want to be able to do cursor[5:5] and # get an empty result set (old behavior was an # exception). It's hard to do that right, though, because the # server uses limit(0) to mean 'no limit'. So we set __empty # in that case and check for it when iterating. We also unset # it anytime we change __limit. self.__empty = False self.__data = deque() self.__address = None self.__retrieved = 0 self.__codec_options = collection.codec_options # Read preference is set when the initial find is sent. self.__read_preference = None self.__read_concern = collection.read_concern self.__query_flags = cursor_type if no_cursor_timeout: self.__query_flags |= _QUERY_OPTIONS["no_timeout"] if allow_partial_results: self.__query_flags |= _QUERY_OPTIONS["partial"] if oplog_replay: self.__query_flags |= _QUERY_OPTIONS["oplog_replay"] # The namespace to use for find/getMore commands. self.__dbname = collection.database.name self.__collname = collection.name
def __init__(self, collection, filter=None, projection=None, skip=0, limit=0, no_cursor_timeout=False, cursor_type=CursorType.NON_TAILABLE, sort=None, allow_partial_results=False, oplog_replay=False, modifiers=None, batch_size=0, manipulate=True): """Create a new cursor. Should not be called directly by application developers - see :meth:`~pymongo.collection.Collection.find` instead. .. mongodoc:: cursors """ self.__id = None spec = filter if spec is None: spec = {} validate_is_mapping("filter", spec) if not isinstance(skip, int): raise TypeError("skip must be an instance of int") if not isinstance(limit, int): raise TypeError("limit must be an instance of int") validate_boolean("no_cursor_timeout", no_cursor_timeout) if cursor_type not in (CursorType.NON_TAILABLE, CursorType.TAILABLE, CursorType.TAILABLE_AWAIT, CursorType.EXHAUST): raise ValueError("not a valid value for cursor_type") validate_boolean("allow_partial_results", allow_partial_results) validate_boolean("oplog_replay", oplog_replay) if modifiers is not None: validate_is_mapping("modifiers", modifiers) if not isinstance(batch_size, integer_types): raise TypeError("batch_size must be an integer") if batch_size < 0: raise ValueError("batch_size must be >= 0") if projection is not None: if not projection: projection = {"_id": 1} projection = helpers._fields_list_to_dict(projection, "projection") self.__collection = collection self.__spec = spec self.__projection = projection self.__skip = skip self.__limit = limit self.__batch_size = batch_size self.__modifiers = modifiers and modifiers.copy() or {} self.__ordering = sort and helpers._index_document(sort) or None self.__max_scan = None self.__explain = False self.__hint = None self.__comment = None self.__max_time_ms = None self.__max_await_time_ms = None self.__max = None self.__min = None self.__manipulate = manipulate # Exhaust cursor support self.__exhaust = False self.__exhaust_mgr = None if cursor_type == CursorType.EXHAUST: if self.__collection.database.client.is_mongos: raise InvalidOperation('Exhaust cursors are ' 'not supported by mongos') if limit: raise InvalidOperation("Can't use limit and exhaust together.") self.__exhaust = True # This is ugly. People want to be able to do cursor[5:5] and # get an empty result set (old behavior was an # exception). It's hard to do that right, though, because the # server uses limit(0) to mean 'no limit'. So we set __empty # in that case and check for it when iterating. We also unset # it anytime we change __limit. self.__empty = False self.__data = deque() self.__address = None self.__retrieved = 0 self.__killed = False self.__codec_options = collection.codec_options self.__read_preference = collection.read_preference self.__read_concern = collection.read_concern self.__query_flags = cursor_type if self.__read_preference != ReadPreference.PRIMARY: self.__query_flags |= _QUERY_OPTIONS["slave_okay"] if no_cursor_timeout: self.__query_flags |= _QUERY_OPTIONS["no_timeout"] if allow_partial_results: self.__query_flags |= _QUERY_OPTIONS["partial"] if oplog_replay: self.__query_flags |= _QUERY_OPTIONS["oplog_replay"]
def _check_closed(self): if self._closed: raise InvalidOperation("Cannot use MongoClient after close")