Beispiel #1
0
def insert(collection_name, docs, check_keys, safe, last_error_args,
           continue_on_error, uuid_subtype):
    """Get an **insert** message.
    """
    max_bson_size = 0
    options = 0
    if continue_on_error:
        options += 1
    data = struct.pack("<i", options)
    data += bson._make_c_string(collection_name)
    encoded = [bson.BSON.encode(doc, check_keys, uuid_subtype) for doc in docs]
    if not encoded:
        raise InvalidOperation("cannot do an empty bulk insert")
    max_bson_size = max(map(len, encoded))
    data += "".join(encoded)
    if safe:
        (_, insert_message) = __pack_message(2002, data)
        (request_id, error_message, _) = __last_error(last_error_args)
        return (request_id, insert_message + error_message, max_bson_size)
    else:
        (request_id, insert_message) = __pack_message(2002, data)
        return (request_id, insert_message, max_bson_size)
Beispiel #2
0
    def limit(self, limit):
        """Limits the number of results to be returned by this cursor.

        Raises :exc:`TypeError` if `limit` is not an integer. Raises
        :exc:`~pymongo.errors.InvalidOperation` if this :class:`Cursor`
        has already been used. The last `limit` applied to this cursor
        takes precedence. A limit of ``0`` is equivalent to no limit.

        :Parameters:
          - `limit`: the number of results to return

        .. mongodoc:: limit
        """
        if not isinstance(limit, int):
            raise TypeError("limit must be an integer")
        if self.__exhaust:
            raise InvalidOperation("Can't use limit and exhaust together.")
        self.__check_okay_to_chain()

        self.__empty = False
        self.__limit = limit
        return self
Beispiel #3
0
    def _apply_to(self, command, is_retryable, read_preference):
        self._check_ended()

        self._server_session.last_use = monotonic.time()
        command['lsid'] = self._server_session.session_id

        if not self._in_transaction:
            self._transaction.state = _TxnState.NONE

        if is_retryable:
            self._server_session._transaction_id += 1
            command['txnNumber'] = self._server_session.transaction_id
            return

        if self._in_transaction:
            if read_preference != ReadPreference.PRIMARY:
                raise InvalidOperation(
                    'read preference in a transaction must be primary, not: '
                    '%r' % (read_preference, ))

            if self._transaction.state == _TxnState.STARTING:
                # First command begins a new transaction.
                self._transaction.state = _TxnState.IN_PROGRESS
                command['startTransaction'] = True

                if self._transaction.opts.read_concern:
                    rc = self._transaction.opts.read_concern.document
                else:
                    rc = {}

                if (self.options.causal_consistency
                        and self.operation_time is not None):
                    rc['afterClusterTime'] = self.operation_time

                if rc:
                    command['readConcern'] = rc

            command['txnNumber'] = self._server_session.transaction_id
            command['autocommit'] = False
    def address(self):
        """(host, port) of the current standalone, primary, or mongos, or None.

        Accessing :attr:`address` raises :exc:`~.errors.InvalidOperation` if
        the client is load-balancing among mongoses, since there is no single
        address. Use :attr:`nodes` instead.

        If the client is not connected, this will block until a connection is
        established or raise ServerSelectionTimeoutError if no server is
        available.

        .. versionadded:: 3.0
        """
        topology_type = self._topology._description.topology_type
        if topology_type == TOPOLOGY_TYPE.Sharded:
            raise InvalidOperation(
                'Cannot use "address" property when load balancing among'
                ' mongoses, use "nodes" instead.')
        if topology_type not in (TOPOLOGY_TYPE.ReplicaSetWithPrimary,
                                 TOPOLOGY_TYPE.Single):
            return None
        return self._server_property('address')
Beispiel #5
0
    def _finish_transaction(self, command_name):
        self._check_ended()

        if not self._in_transaction_or_auto_start():
            raise InvalidOperation("No transaction started")

        try:
            if self._server_session.statement_id == 0:
                # Not really started.
                self._server_session._transaction_id += 1
                return

            # TODO: retryable. And it's weird to pass parse_write_concern_error
            # from outside database.py.
            self._client.admin.command(
                command_name,
                session=self,
                write_concern=self._transaction.opts.write_concern,
                parse_write_concern_error=True)
        finally:
            self._server_session.reset_transaction()
            self._transaction = None
    def _finish_transaction(self, command_name):
        self._check_ended()

        if not self._in_transaction_or_auto_start():
            raise InvalidOperation("No transaction started")

        try:
            if not self._transaction.sent_command:
                # Not really started.
                return

            # TODO: commitTransaction should be a retryable write.
            # Use _command directly because commit/abort are writes and must
            # always go to the primary.
            with self._client._socket_for_writes() as sock_info:
                return self._client.admin._command(
                    sock_info,
                    command_name,
                    session=self,
                    write_concern=self._transaction.opts.write_concern,
                    parse_write_concern_error=True)
        finally:
            self._transaction = None
Beispiel #7
0
  async def get(self, **kwargs):
    if not self.table:
      raise InvalidOperation("No table")

    query = kwargs.pop("query", kwargs)
    sort = kwargs.pop("sort", None)
    many = kwargs.pop("many", False)
    limit = kwargs.pop("limit", None)

    if many:
      data = await self.table.find(query).sort(sort).to_list(limit) if sort else await self.table.find(query).to_list(limit)
    else:
      if sort:
        docs = await self.table.find(query).sort(sort).to_list(1)
        data = docs[0] if docs else None
      else:
        data = await self.table.find_one(query)

    # data = await self.table.find(query).to_list(limit) if many else await self.table.find_one(query)
    if not data:
      raise NotFound(query)

    self.load(data, many)
Beispiel #8
0
    def execute_op_msg_no_results(self, sock_info, generator):
        """Execute write commands with OP_MSG and w=0 writeConcern, unordered.
        """
        db_name = self.collection.database.name
        client = self.collection.database.client
        listeners = client._event_listeners
        op_id = _randint()

        if not self.current_run:
            self.current_run = next(generator)
        run = self.current_run

        while run:
            cmd = SON([(_COMMANDS[run.op_type], self.collection.name),
                       ('ordered', False), ('writeConcern', {
                           'w': 0
                       })])
            bwc = _BulkWriteContext(db_name, cmd, sock_info, op_id, listeners,
                                    None)

            while run.idx_offset < len(run.ops):
                check_keys = run.op_type == _INSERT
                ops = islice(run.ops, run.idx_offset, None)
                # Run as many ops as possible.
                request_id, msg, to_send = _do_bulk_write_command(
                    self.namespace, run.op_type, cmd, ops, check_keys,
                    self.collection.codec_options, bwc)
                if not to_send:
                    raise InvalidOperation("cannot do an empty bulk write")
                run.idx_offset += len(to_send)
                # Though this isn't strictly a "legacy" write, the helper
                # handles publishing commands and sending our message
                # without receiving a result. Send 0 for max_doc_size
                # to disable size checking. Size checking is handled while
                # the documents are encoded to BSON.
                bwc.legacy_write(request_id, msg, 0, False, to_send)
            self.current_run = run = next(generator, None)
def insert(collection_name,
           docs,
           check_keys,
           safe,
           last_error_args,
           continue_on_error,
           uuid_subtype,
           codec_options=None):
    """Get an **insert** message.

    .. note:: As of PyMongo 2.6, this function is no longer used. It
       is being kept (with tests) for backwards compatibility with 3rd
       party libraries that may currently be using it, but will likely
       be removed in a future release.

    """
    if codec_options is not None:
        uuid_subtype = codec_options.uuid_representation
    options = 0
    if continue_on_error:
        options += 1
    data = struct.pack("<i", options)
    data += bson._make_c_string(collection_name)
    encoded = [bson.BSON.encode(doc, check_keys, uuid_subtype) for doc in docs]
    if not encoded:
        raise InvalidOperation("cannot do an empty bulk insert")
    max_bson_size = max(list(map(len, encoded)))
    data += _EMPTY.join(encoded)
    if safe:
        (_, insert_message) = __pack_message(2002, data)
        (request_id, error_message, _) = __last_error(collection_name,
                                                      last_error_args)
        return (request_id, insert_message + error_message, max_bson_size)
    else:
        (request_id, insert_message) = __pack_message(2002, data)
        return (request_id, insert_message, max_bson_size)
    def start_transaction(self,
                          read_concern=None,
                          write_concern=None,
                          read_preference=None,
                          max_commit_time_ms=None):
        """Start a multi-statement transaction.

        Takes the same arguments as :class:`TransactionOptions`.

        .. versionchanged:: 3.9
           Added the ``max_commit_time_ms`` option.

        .. versionadded:: 3.7
        """
        self._check_ended()

        if self.in_transaction:
            raise InvalidOperation("Transaction already in progress")

        read_concern = self._inherit_option("read_concern", read_concern)
        write_concern = self._inherit_option("write_concern", write_concern)
        read_preference = self._inherit_option("read_preference",
                                               read_preference)
        if max_commit_time_ms is None:
            opts = self.options.default_transaction_options
            if opts:
                max_commit_time_ms = opts.max_commit_time_ms

        self._transaction.opts = TransactionOptions(read_concern,
                                                    write_concern,
                                                    read_preference,
                                                    max_commit_time_ms)
        self._transaction.reset()
        self._transaction.state = _TxnState.STARTING
        self._start_retryable_write()
        return _TransactionContext(self)
    def start_transaction(self,
                          read_concern=None,
                          write_concern=None,
                          read_preference=None):
        """Start a multi-statement transaction.

        Takes the same arguments as :class:`TransactionOptions`.

        .. versionadded:: 3.7
        """
        self._check_ended()

        if self._in_transaction:
            raise InvalidOperation("Transaction already in progress")

        read_concern = self._inherit_option("read_concern", read_concern)
        write_concern = self._inherit_option("write_concern", write_concern)
        read_preference = self._inherit_option("read_preference",
                                               read_preference)

        self._transaction = _Transaction(
            TransactionOptions(read_concern, write_concern, read_preference))
        self._server_session._transaction_id += 1
        return _TransactionContext(self)
Beispiel #12
0
    def _ensure_opened(self):
        """Start monitors, or restart after a fork.

        Hold the lock when calling this.
        """
        if self._closed:
            raise InvalidOperation("Cannot use MongoClient after close")

        if not self._opened:
            self._opened = True
            self._update_servers()

            # Start or restart the events publishing thread.
            if self._publish_tp or self._publish_server:
                self.__events_executor.open()

            # Start the SRV polling thread.
            if self._srv_monitor and (self.description.topology_type
                                      in SRV_POLLING_TOPOLOGIES):
                self._srv_monitor.open()

            if self._settings.load_balanced:
                # Emit initial SDAM events for load balancer mode.
                self._process_change(
                    ServerDescription(
                        self._seed_addresses[0],
                        Hello({
                            "ok": 1,
                            "serviceId": self._topology_id,
                            "maxWireVersion": 13
                        }),
                    ))

        # Ensure that the monitors are open.
        for server in self._servers.values():
            server.open()
Beispiel #13
0
    def start_transaction(self, read_concern=None, write_concern=None,
                          read_preference=None):
        """Start a multi-statement transaction.

        Takes the same arguments as :class:`TransactionOptions`.

        .. versionadded:: 3.7
        """
        self._check_ended()

        if self._in_transaction:
            raise InvalidOperation("Transaction already in progress")

        read_concern = self._inherit_option("read_concern", read_concern)
        write_concern = self._inherit_option("write_concern", write_concern)
        read_preference = self._inherit_option(
            "read_preference", read_preference)

        self._transaction.opts = TransactionOptions(
            read_concern, write_concern, read_preference)
        self._transaction.reset()
        self._transaction.state = _TxnState.STARTING
        self._start_retryable_write()
        return _TransactionContext(self)
Beispiel #14
0
def _do_batched_insert(collection_name, docs, check_keys, safe,
                       last_error_args, continue_on_error, opts, ctx):
    """Insert `docs` using multiple batches.
    """
    def _insert_message(insert_message, send_safe):
        """Build the insert message with header and GLE.
        """
        request_id, final_message = __pack_message(2002, insert_message)
        if send_safe:
            request_id, error_message, _ = __last_error(
                collection_name, last_error_args)
            final_message += error_message
        return request_id, final_message

    send_safe = safe or not continue_on_error
    last_error = None
    data = StringIO()
    data.write(struct.pack("<i", int(continue_on_error)))
    data.write(bson._make_c_string(collection_name))
    message_length = begin_loc = data.tell()
    has_docs = False
    to_send = []
    for doc in docs:
        encoded = bson.BSON.encode(doc, check_keys, opts)
        encoded_length = len(encoded)
        too_large = (encoded_length > ctx.max_bson_size)

        message_length += encoded_length
        if message_length < ctx.max_message_size and not too_large:
            data.write(encoded)
            to_send.append(doc)
            has_docs = True
            continue

        if has_docs:
            # We have enough data, send this message.
            try:
                request_id, msg = _insert_message(data.getvalue(), send_safe)
                ctx.legacy_write(request_id, msg, 0, send_safe, to_send)
            # Exception type could be OperationFailure or a subtype
            # (e.g. DuplicateKeyError)
            except OperationFailure as exc:
                # Like it says, continue on error...
                if continue_on_error:
                    # Store exception details to re-raise after the final batch.
                    last_error = exc
                # With unacknowledged writes just return at the first error.
                elif not safe:
                    return
                # With acknowledged writes raise immediately.
                else:
                    raise

        if too_large:
            _raise_document_too_large("insert", encoded_length,
                                      ctx.max_bson_size)

        message_length = begin_loc + encoded_length
        data.seek(begin_loc)
        data.truncate()
        data.write(encoded)
        to_send = [doc]

    if not has_docs:
        raise InvalidOperation("cannot do an empty bulk insert")

    request_id, msg = _insert_message(data.getvalue(), safe)
    ctx.legacy_write(request_id, msg, 0, safe, to_send)

    # Re-raise any exception stored due to continue_on_error
    if last_error is not None:
        raise last_error
Beispiel #15
0
 def _check_ended(self):
     if self._server_session is None:
         raise InvalidOperation("Cannot use ended session")
Beispiel #16
0
def _copy_database(fromdb, todb, fromhost, mechanism, username, password,
                   sock_info, cmd_func):
    """Copy a database, perhaps from a remote host.

    :Parameters:
      - `fromdb`: Source database.
      - `todb`: Target database.
      - `fromhost`: Source host like 'foo.com', 'foo.com:27017', or None.
      - `mechanism`: An authentication mechanism.
      - `username`: A str or unicode, or None.
      - `password`: A str or unicode, or None.
      - `sock_info`: A SocketInfo instance.
      - `cmd_func`: A callback taking args sock_info, database, command doc.
    """
    if not isinstance(fromdb, str):
        raise TypeError('from_name must be an instance '
                        'of %s' % (str.__name__, ))
    if not isinstance(todb, str):
        raise TypeError('to_name must be an instance '
                        'of %s' % (str.__name__, ))

    _check_database_name(todb)

    warnings.warn(
        "copy_database is deprecated. Use the raw 'copydb' command"
        " or db.copyDatabase() in the mongo shell. See"
        " doc/examples/copydb.",
        DeprecationWarning,
        stacklevel=2)

    # It would be better if the user told us what mechanism to use, but for
    # backwards compatibility with earlier PyMongos we don't require the
    # mechanism. Hope 'fromhost' runs the same version as the target.
    if mechanism == 'DEFAULT':
        if sock_info.max_wire_version >= 3:
            mechanism = 'SCRAM-SHA-1'
        else:
            mechanism = 'MONGODB-CR'

    if username is not None:
        if mechanism == 'SCRAM-SHA-1':
            credentials = auth._build_credentials_tuple(mech=mechanism,
                                                        source='admin',
                                                        user=username,
                                                        passwd=password,
                                                        extra=None)

            try:
                auth._copydb_scram_sha1(credentials=credentials,
                                        sock_info=sock_info,
                                        cmd_func=cmd_func,
                                        fromdb=fromdb,
                                        todb=todb,
                                        fromhost=fromhost)
            except OperationFailure as exc:
                errmsg = exc.details and exc.details.get('errmsg') or ''
                if 'no such cmd: saslStart' in errmsg:
                    explanation = ("%s doesn't support SCRAM-SHA-1, pass"
                                   " mechanism='MONGODB-CR' to copy_database" %
                                   fromhost)

                    raise OperationFailure(explanation, exc.code, exc.details)
                else:
                    raise

        elif mechanism == 'MONGODB-CR':
            get_nonce_cmd = SON([('copydbgetnonce', 1),
                                 ('fromhost', fromhost)])

            get_nonce_response, _ = cmd_func(sock_info, 'admin', get_nonce_cmd)
            nonce = get_nonce_response['nonce']
            copydb_cmd = SON([('copydb', 1), ('fromdb', fromdb),
                              ('todb', todb)])

            copydb_cmd['username'] = username
            copydb_cmd['nonce'] = nonce
            copydb_cmd['key'] = auth._auth_key(nonce, username, password)
            if fromhost is not None:
                copydb_cmd['fromhost'] = fromhost

            cmd_func(sock_info, 'admin', copydb_cmd)
        else:
            raise InvalidOperation('Authentication mechanism %r not supported'
                                   ' for copy_database' % mechanism)
    else:
        # No username.
        copydb_cmd = SON([('copydb', 1), ('fromdb', fromdb), ('todb', todb)])

        if fromhost:
            copydb_cmd['fromhost'] = fromhost

        cmd_func(sock_info, 'admin', copydb_cmd)
Beispiel #17
0
    def __send_message(self, operation):
        """Send a query or getmore operation and handles the response.

        If operation is ``None`` this is an exhaust cursor, which reads
        the next result batch off the exhaust socket instead of
        sending getMore messages to the server.

        Can raise ConnectionFailure.
        """
        client = self.__collection.database.client
        # OP_MSG is required to support exhaust cursors with encryption.
        if client._encrypter and self.__exhaust:
            raise InvalidOperation(
                "exhaust cursors do not support auto encryption")

        try:
            response = client._run_operation_with_response(
                operation,
                self._unpack_response,
                exhaust=self.__exhaust,
                address=self.__address)
        except OperationFailure:
            self.__killed = True

            # Make sure exhaust socket is returned immediately, if necessary.
            self.__die()

            # If this is a tailable cursor the error is likely
            # due to capped collection roll over. Setting
            # self.__killed to True ensures Cursor.alive will be
            # False. No need to re-raise.
            if self.__query_flags & _QUERY_OPTIONS["tailable_cursor"]:
                return
            raise
        except NotMasterError:
            # Don't send kill cursors to another server after a "not master"
            # error. It's completely pointless.
            self.__killed = True

            # Make sure exhaust socket is returned immediately, if necessary.
            self.__die()

            raise
        except ConnectionFailure:
            # Don't try to send kill cursors on another socket
            # or to another server. It can cause a _pinValue
            # assertion on some server releases if we get here
            # due to a socket timeout.
            self.__killed = True
            self.__die()
            raise
        except Exception:
            # Close the cursor
            self.__die()
            raise

        self.__address = response.address
        if self.__exhaust and not self.__exhaust_mgr:
            # 'response' is an ExhaustResponse.
            self.__exhaust_mgr = _SocketManager(response.socket_info,
                                                response.pool)

        cmd_name = operation.name
        docs = response.docs
        if response.from_command:
            if cmd_name != "explain":
                cursor = docs[0]['cursor']
                self.__id = cursor['id']
                if cmd_name == 'find':
                    documents = cursor['firstBatch']
                    # Update the namespace used for future getMore commands.
                    ns = cursor.get('ns')
                    if ns:
                        self.__dbname, self.__collname = ns.split('.', 1)
                else:
                    documents = cursor['nextBatch']
                self.__data = deque(documents)
                self.__retrieved += len(documents)
            else:
                self.__id = 0
                self.__data = deque(docs)
                self.__retrieved += len(docs)
        else:
            self.__id = response.data.cursor_id
            self.__data = deque(docs)
            self.__retrieved += response.data.number_returned

        if self.__id == 0:
            self.__killed = True
            # Don't wait for garbage collection to call __del__, return the
            # socket and the session to the pool now.
            self.__die()

        if self.__limit and self.__id and self.__limit <= self.__retrieved:
            self.__die()
Beispiel #18
0
 def __getitem__(self, index):
     raise InvalidOperation("Cannot call __getitem__ on RawBatchCursor")
Beispiel #19
0
 def _check_closed(self):
     if self._encryption is None:
         raise InvalidOperation("Cannot use closed ClientEncryption")
Beispiel #20
0
 def __check_okay_to_chain(self):
     """Check if it is okay to chain more options onto this cursor.
     """
     if self.__retrieved or self.__id is not None:
         raise InvalidOperation("cannot set options after executing query")
Beispiel #21
0
    def _execute_command(self, generator, write_concern, session, sock_info,
                         op_id, retryable, full_result):
        if sock_info.max_wire_version < 5 and self.uses_collation:
            raise ConfigurationError(
                'Must be connected to MongoDB 3.4+ to use a collation.')
        if sock_info.max_wire_version < 6 and self.uses_array_filters:
            raise ConfigurationError(
                'Must be connected to MongoDB 3.6+ to use arrayFilters.')

        db_name = self.collection.database.name
        client = self.collection.database.client
        listeners = client._event_listeners

        if not self.current_run:
            self.current_run = next(generator)
        run = self.current_run

        # sock_info.command validates the session, but we use
        # sock_info.write_command.
        sock_info.validate_session(client, session)
        while run:
            cmd = SON([(_COMMANDS[run.op_type], self.collection.name),
                       ('ordered', self.ordered)])
            if not write_concern.is_server_default:
                cmd['writeConcern'] = write_concern.document
            if self.bypass_doc_val and sock_info.max_wire_version >= 4:
                cmd['bypassDocumentValidation'] = True
            bwc = _BulkWriteContext(db_name, cmd, sock_info, op_id, listeners,
                                    session)

            while run.idx_offset < len(run.ops):
                if session:
                    session._apply_to(cmd, retryable, ReadPreference.PRIMARY)
                sock_info.send_cluster_time(cmd, session, client)
                check_keys = run.op_type == _INSERT
                ops = islice(run.ops, run.idx_offset, None)
                # Run as many ops as possible.
                request_id, msg, to_send = _do_bulk_write_command(
                    self.namespace, run.op_type, cmd, ops, check_keys,
                    self.collection.codec_options, bwc)
                if not to_send:
                    raise InvalidOperation("cannot do an empty bulk write")
                result = bwc.write_command(request_id, msg, to_send)
                client._receive_cluster_time(result, session)

                # Retryable writeConcernErrors halt the execution of this run.
                wce = result.get('writeConcernError', {})
                if wce.get('code', 0) in _RETRYABLE_ERROR_CODES:
                    # Synthesize the full bulk result without modifying the
                    # current one because this write operation may be retried.
                    full = copy.deepcopy(full_result)
                    _merge_command(run, full, run.idx_offset, result)
                    _raise_bulk_write_error(full)

                _merge_command(run, full_result, run.idx_offset, result)
                # We're no longer in a retry once a command succeeds.
                self.retrying = False
                if self.ordered and "writeErrors" in result:
                    break
                run.idx_offset += len(to_send)

            # We're supposed to continue if errors are
            # at the write concern level (e.g. wtimeout)
            if self.ordered and full_result['writeErrors']:
                break
            # Reset our state
            self.current_run = run = next(generator, None)
Beispiel #22
0
    def __init__(self,
                 collection,
                 spec=None,
                 fields=None,
                 skip=0,
                 limit=0,
                 timeout=True,
                 snapshot=False,
                 tailable=False,
                 sort=None,
                 max_scan=None,
                 as_class=None,
                 slave_okay=False,
                 await_data=False,
                 partial=False,
                 manipulate=True,
                 read_preference=ReadPreference.PRIMARY,
                 tag_sets=[{}],
                 secondary_acceptable_latency_ms=None,
                 exhaust=False,
                 compile_re=True,
                 oplog_replay=False,
                 modifiers=None,
                 _must_use_master=False,
                 _codec_options=None,
                 **kwargs):
        """Create a new cursor.

        Should not be called directly by application developers - see
        :meth:`~pymongo.collection.Collection.find` instead.

        .. mongodoc:: cursors
        """

        # Backport aliases.
        if 'filter' in kwargs:
            spec = kwargs['filter']
        if 'projection' in kwargs:
            fields = kwargs['projection']
        if 'no_cursor_timeout' in kwargs:
            timeout = not kwargs['no_cursor_timeout']
        if 'allow_partial_results' in kwargs:
            partial = kwargs['allow_partial_results']

        if 'cursor_type' in kwargs:
            crt = kwargs['cursor_type']
            if crt not in (CursorType.NON_TAILABLE, CursorType.TAILABLE,
                           CursorType.TAILABLE_AWAIT, CursorType.EXHAUST):
                raise ValueError("not a valid value for cursor_type")
            exhaust = crt == CursorType.EXHAUST
            tailable = crt == CursorType.TAILABLE
            if crt == CursorType.TAILABLE_AWAIT:
                await_data = True
                tailable = True

        if modifiers is not None:
            if not isinstance(modifiers, dict):
                raise TypeError("%s must be an instance of dict or subclass" %
                                (modifiers, ))
            if '$snapshot' in modifiers:
                snapshot = modifiers['$snapshot']
            if '$maxScan' in modifiers:
                max_scan = modifiers['$maxScan']

        self.__id = None

        if spec is None:
            spec = {}

        if not isinstance(spec, dict):
            raise TypeError("spec must be an instance of dict")
        if not isinstance(skip, int):
            raise TypeError("skip must be an instance of int")
        if not isinstance(limit, int):
            raise TypeError("limit must be an instance of int")
        if not isinstance(timeout, bool):
            raise TypeError("timeout must be an instance of bool")
        if not isinstance(snapshot, bool):
            raise TypeError("snapshot must be an instance of bool")
        if not isinstance(tailable, bool):
            raise TypeError("tailable must be an instance of bool")
        if not isinstance(slave_okay, bool):
            raise TypeError("slave_okay must be an instance of bool")
        if not isinstance(await_data, bool):
            raise TypeError("await_data must be an instance of bool")
        if not isinstance(partial, bool):
            raise TypeError("partial must be an instance of bool")
        if not isinstance(exhaust, bool):
            raise TypeError("exhaust must be an instance of bool")
        if not isinstance(oplog_replay, bool):
            raise TypeError("oplog_replay must be an instance of bool")

        if fields is not None:
            if not fields:
                fields = {"_id": 1}
            if not isinstance(fields, dict):
                fields = helpers._fields_list_to_dict(fields)

        self.__collection = collection
        self.__spec = spec
        self.__fields = fields
        self.__skip = skip
        self.__limit = limit
        self.__max_time_ms = None
        self.__batch_size = 0
        self.__max = None
        self.__min = None
        self.__modifiers = modifiers and modifiers.copy() or {}

        # Exhaust cursor support
        if self.__collection.database.connection.is_mongos and exhaust:
            raise InvalidOperation('Exhaust cursors are '
                                   'not supported by mongos')
        if limit and exhaust:
            raise InvalidOperation("Can't use limit and exhaust together.")
        self.__exhaust = exhaust
        self.__exhaust_mgr = None

        # This is ugly. People want to be able to do cursor[5:5] and
        # get an empty result set (old behavior was an
        # exception). It's hard to do that right, though, because the
        # server uses limit(0) to mean 'no limit'. So we set __empty
        # in that case and check for it when iterating. We also unset
        # it anytime we change __limit.
        self.__empty = False

        self.__snapshot = snapshot
        self.__ordering = sort and helpers._index_document(sort) or None
        self.__max_scan = max_scan
        self.__explain = False
        self.__hint = None
        self.__comment = None
        self.__slave_okay = slave_okay
        self.__manipulate = manipulate
        self.__read_preference = read_preference
        self.__tag_sets = tag_sets
        self.__secondary_acceptable_latency_ms = secondary_acceptable_latency_ms
        self.__compile_re = compile_re
        self.__must_use_master = _must_use_master

        copts = _codec_options or collection.codec_options
        if as_class is not None:
            copts = _CodecOptions(as_class, copts.tz_aware,
                                  copts.uuid_representation)
        self.__codec_options = copts

        self.__data = deque()
        self.__connection_id = None
        self.__retrieved = 0
        self.__killed = False

        self.__query_flags = 0
        if tailable:
            self.__query_flags |= _QUERY_OPTIONS["tailable_cursor"]
        if not timeout:
            self.__query_flags |= _QUERY_OPTIONS["no_timeout"]
        if tailable and await_data:
            self.__query_flags |= _QUERY_OPTIONS["await_data"]
        if exhaust:
            self.__query_flags |= _QUERY_OPTIONS["exhaust"]
        if partial:
            self.__query_flags |= _QUERY_OPTIONS["partial"]
        if oplog_replay:
            self.__query_flags |= _QUERY_OPTIONS["oplog_replay"]

        # this is for passing network_timeout through if it's specified
        # need to use kwargs as None is a legit value for network_timeout
        self.__kwargs = kwargs
Beispiel #23
0
    def try_next(self):
        """Advance the cursor without blocking indefinitely.

        This method returns the next change document without waiting
        indefinitely for the next change. For example::

            with db.collection.watch() as stream:
                while stream.alive:
                    change = stream.try_next()
                    # Note that the ChangeStream's resume token may be updated
                    # even when no changes are returned.
                    print("Current resume token: %r" % (stream.resume_token,))
                    if change is not None:
                        print("Change document: %r" % (change,))
                        continue
                    # We end up here when there are no recent changes.
                    # Sleep for a while before trying again to avoid flooding
                    # the server with getMore requests when no changes are
                    # available.
                    time.sleep(10)

        If no change document is cached locally then this method runs a single
        getMore command. If the getMore yields any documents, the next
        document is returned, otherwise, if the getMore returns no documents
        (because there have been no changes) then ``None`` is returned.

        :Returns:
          The next change document or ``None`` when no document is available
          after running a single getMore or when the cursor is closed.

        .. versionadded:: 3.8
        """
        # Attempt to get the next change with at most one getMore and at most
        # one resume attempt.
        try:
            change = self._cursor._try_next(True)
        except ConnectionFailure:
            self._resume()
            change = self._cursor._try_next(False)
        except OperationFailure as exc:
            if (exc.code in _NON_RESUMABLE_GETMORE_ERRORS
                    or exc.has_error_label("NonResumableChangeStreamError")):
                raise
            self._resume()
            change = self._cursor._try_next(False)

        # If no changes are available.
        if change is None:
            # We have either iterated over all documents in the cursor,
            # OR the most-recently returned batch is empty. In either case,
            # update the cached resume token with the postBatchResumeToken if
            # one was returned. We also clear the startAtOperationTime.
            if self._cursor._post_batch_resume_token is not None:
                self._resume_token = self._cursor._post_batch_resume_token
                self._start_at_operation_time = None
            return change

        # Else, changes are available.
        try:
            resume_token = change['_id']
        except KeyError:
            self.close()
            raise InvalidOperation(
                "Cannot provide resume functionality when the resume "
                "token is missing.")

        # If this is the last change document from the current batch, cache the
        # postBatchResumeToken.
        if (not self._cursor._has_next()
                and self._cursor._post_batch_resume_token):
            resume_token = self._cursor._post_batch_resume_token

        # Hereafter, don't use startAfter; instead use resumeAfter.
        self._uses_start_after = False
        self._uses_resume_after = True

        # Cache the resume token and clear startAtOperationTime.
        self._resume_token = resume_token
        self._start_at_operation_time = None

        if self._decode_custom:
            return _bson_to_dict(change.raw, self._orig_codec_options)
        return change
Beispiel #24
0
def _do_batched_write_command(namespace, operation, command, docs, check_keys,
                              opts, ctx):
    """Execute a batch of insert, update, or delete commands.
    """
    max_bson_size = ctx.max_bson_size
    max_write_batch_size = ctx.max_write_batch_size
    # Max BSON object size + 16k - 2 bytes for ending NUL bytes.
    # Server guarantees there is enough room: SERVER-10643.
    max_cmd_size = max_bson_size + _COMMAND_OVERHEAD

    ordered = command.get('ordered', True)

    buf = StringIO()
    # Save space for message length and request id
    buf.write(_ZERO_64)
    # responseTo, opCode
    buf.write(b"\x00\x00\x00\x00\xd4\x07\x00\x00")
    # No options
    buf.write(_ZERO_32)
    # Namespace as C string
    buf.write(b(namespace))
    buf.write(_ZERO_8)
    # Skip: 0, Limit: -1
    buf.write(_SKIPLIM)

    # Where to write command document length
    command_start = buf.tell()
    buf.write(bson.BSON.encode(command))

    # Start of payload
    buf.seek(-1, 2)
    # Work around some Jython weirdness.
    buf.truncate()
    try:
        buf.write(_OP_MAP[operation])
    except KeyError:
        raise InvalidOperation('Unknown command')

    if operation in (_UPDATE, _DELETE):
        check_keys = False

    # Where to write list document length
    list_start = buf.tell() - 4

    to_send = []

    def send_message():
        """Finalize and send the current OP_QUERY message.
        """
        # Close list and command documents
        buf.write(_ZERO_16)

        # Write document lengths and request id
        length = buf.tell()
        buf.seek(list_start)
        buf.write(struct.pack('<i', length - list_start - 1))
        buf.seek(command_start)
        buf.write(struct.pack('<i', length - command_start))
        buf.seek(4)
        request_id = _randint()
        buf.write(struct.pack('<i', request_id))
        buf.seek(0)
        buf.write(struct.pack('<i', length))
        return ctx.write_command(request_id, buf.getvalue(), to_send)

    # If there are multiple batches we'll
    # merge results in the caller.
    results = []

    idx = 0
    idx_offset = 0
    has_docs = False
    for doc in docs:
        has_docs = True
        # Encode the current operation
        key = b(str(idx))
        value = bson.BSON.encode(doc, check_keys, opts)
        # Send a batch?
        enough_data = (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size
        enough_documents = (idx >= max_write_batch_size)
        if enough_data or enough_documents:
            if not idx:
                write_op = "insert" if operation == _INSERT else None
                _raise_document_too_large(write_op, len(value), max_bson_size)
            result = send_message()
            results.append((idx_offset, result))
            if ordered and "writeErrors" in result:
                return results

            # Truncate back to the start of list elements
            buf.seek(list_start + 4)
            buf.truncate()
            idx_offset += idx
            idx = 0
            key = b'0'
            to_send = []
        buf.write(_BSONOBJ)
        buf.write(key)
        buf.write(_ZERO_8)
        buf.write(value)
        to_send.append(doc)
        idx += 1

    if not has_docs:
        raise InvalidOperation("cannot do an empty bulk write")

    results.append((idx_offset, send_message()))
    return results
Beispiel #25
0
def _disallow_transactions(session):
    if session and session.in_transaction:
        raise InvalidOperation(
            'GridFS does not support multi-document transactions')
Beispiel #26
0
def _do_batched_write_command(namespace, operation, command, docs, check_keys,
                              opts, ctx):
    """Create the next batched insert, update, or delete command.
    """
    max_bson_size = ctx.max_bson_size
    max_write_batch_size = ctx.max_write_batch_size
    # Max BSON object size + 16k - 2 bytes for ending NUL bytes.
    # Server guarantees there is enough room: SERVER-10643.
    max_cmd_size = max_bson_size + _COMMAND_OVERHEAD

    buf = StringIO()
    # Save space for message length and request id
    buf.write(_ZERO_64)
    # responseTo, opCode
    buf.write(b"\x00\x00\x00\x00\xd4\x07\x00\x00")
    # No options
    buf.write(_ZERO_32)
    # Namespace as C string
    buf.write(b(namespace))
    buf.write(_ZERO_8)
    # Skip: 0, Limit: -1
    buf.write(_SKIPLIM)

    # Where to write command document length
    command_start = buf.tell()
    buf.write(bson.BSON.encode(command))

    # Start of payload
    buf.seek(-1, 2)
    # Work around some Jython weirdness.
    buf.truncate()
    try:
        buf.write(_OP_MAP[operation])
    except KeyError:
        raise InvalidOperation('Unknown command')

    if operation in (_UPDATE, _DELETE):
        check_keys = False

    # Where to write list document length
    list_start = buf.tell() - 4
    to_send = []
    idx = 0
    for doc in docs:
        # Encode the current operation
        key = b(str(idx))
        value = bson.BSON.encode(doc, check_keys, opts)
        # Is there enough room to add this document? max_cmd_size accounts for
        # the two trailing null bytes.
        enough_data = (buf.tell() + len(key) + len(value)) >= max_cmd_size
        enough_documents = (idx >= max_write_batch_size)
        if enough_data or enough_documents:
            if not idx:
                write_op = "insert" if operation == _INSERT else None
                _raise_document_too_large(write_op, len(value), max_bson_size)
            break
        buf.write(_BSONOBJ)
        buf.write(key)
        buf.write(_ZERO_8)
        buf.write(value)
        to_send.append(doc)
        idx += 1

    # Finalize the current OP_QUERY message.
    # Close list and command documents
    buf.write(_ZERO_16)

    # Write document lengths and request id
    length = buf.tell()
    buf.seek(list_start)
    buf.write(struct.pack('<i', length - list_start - 1))
    buf.seek(command_start)
    buf.write(struct.pack('<i', length - command_start))
    buf.seek(4)
    request_id = _randint()
    buf.write(struct.pack('<i', request_id))
    buf.seek(0)
    buf.write(struct.pack('<i', length))

    return request_id, buf.getvalue(), to_send
Beispiel #27
0
 def check_session_auth_matches(self, session):
     """Raise error if a ClientSession is logged in as a different user."""
     if session and session._authset != self.authset:
         raise InvalidOperation('session was used after authenticating'
                                ' with different credentials')
Beispiel #28
0
    def __init__(self,
                 collection,
                 filter=None,
                 projection=None,
                 skip=0,
                 limit=0,
                 no_cursor_timeout=False,
                 cursor_type=CursorType.NON_TAILABLE,
                 sort=None,
                 allow_partial_results=False,
                 oplog_replay=False,
                 modifiers=None,
                 batch_size=0,
                 collation=None,
                 hint=None,
                 max_scan=None,
                 max_time_ms=None,
                 max=None,
                 min=None,
                 return_key=False,
                 show_record_id=False,
                 snapshot=False,
                 comment=None,
                 session=None,
                 allow_disk_use=None):
        """Create a new cursor.

        Should not be called directly by application developers - see
        :meth:`~pymongo.collection.Collection.find` instead.

        .. mongodoc:: cursors
        """
        # Initialize all attributes used in __del__ before possibly raising
        # an error to avoid attribute errors during garbage collection.
        self.__id = None
        self.__exhaust = False
        self.__exhaust_mgr = None
        self.__killed = False

        if session:
            self.__session = session
            self.__explicit_session = True
        else:
            self.__session = None
            self.__explicit_session = False

        spec = filter
        if spec is None:
            spec = {}

        validate_is_mapping("filter", spec)
        if not isinstance(skip, int):
            raise TypeError("skip must be an instance of int")
        if not isinstance(limit, int):
            raise TypeError("limit must be an instance of int")
        validate_boolean("no_cursor_timeout", no_cursor_timeout)
        if no_cursor_timeout and not self.__explicit_session:
            warnings.warn(
                "use an explicit session with no_cursor_timeout=True "
                "otherwise the cursor may still timeout after "
                "30 minutes, for more info see "
                "https://docs.mongodb.com/v4.4/reference/method/"
                "cursor.noCursorTimeout/"
                "#session-idle-timeout-overrides-nocursortimeout",
                UserWarning,
                stacklevel=2)
        if cursor_type not in (CursorType.NON_TAILABLE, CursorType.TAILABLE,
                               CursorType.TAILABLE_AWAIT, CursorType.EXHAUST):
            raise ValueError("not a valid value for cursor_type")
        validate_boolean("allow_partial_results", allow_partial_results)
        validate_boolean("oplog_replay", oplog_replay)
        if modifiers is not None:
            warnings.warn("the 'modifiers' parameter is deprecated",
                          DeprecationWarning,
                          stacklevel=2)
            validate_is_mapping("modifiers", modifiers)
        if not isinstance(batch_size, int):
            raise TypeError("batch_size must be an integer")
        if batch_size < 0:
            raise ValueError("batch_size must be >= 0")
        # Only set if allow_disk_use is provided by the user, else None.
        if allow_disk_use is not None:
            allow_disk_use = validate_boolean("allow_disk_use", allow_disk_use)

        if projection is not None:
            if not projection:
                projection = {"_id": 1}
            projection = helpers._fields_list_to_dict(projection, "projection")

        self.__collection = collection
        self.__spec = spec
        self.__projection = projection
        self.__skip = skip
        self.__limit = limit
        self.__batch_size = batch_size
        self.__modifiers = modifiers and modifiers.copy() or {}
        self.__ordering = sort and helpers._index_document(sort) or None
        self.__max_scan = max_scan
        self.__explain = False
        self.__comment = comment
        self.__max_time_ms = max_time_ms
        self.__max_await_time_ms = None
        self.__max = max
        self.__min = min
        self.__collation = validate_collation_or_none(collation)
        self.__return_key = return_key
        self.__show_record_id = show_record_id
        self.__allow_disk_use = allow_disk_use
        self.__snapshot = snapshot
        self.__set_hint(hint)

        # Exhaust cursor support
        if cursor_type == CursorType.EXHAUST:
            if self.__collection.database.client.is_mongos:
                raise InvalidOperation('Exhaust cursors are '
                                       'not supported by mongos')
            if limit:
                raise InvalidOperation("Can't use limit and exhaust together.")
            self.__exhaust = True

        # This is ugly. People want to be able to do cursor[5:5] and
        # get an empty result set (old behavior was an
        # exception). It's hard to do that right, though, because the
        # server uses limit(0) to mean 'no limit'. So we set __empty
        # in that case and check for it when iterating. We also unset
        # it anytime we change __limit.
        self.__empty = False

        self.__data = deque()
        self.__address = None
        self.__retrieved = 0

        self.__codec_options = collection.codec_options
        # Read preference is set when the initial find is sent.
        self.__read_preference = None
        self.__read_concern = collection.read_concern

        self.__query_flags = cursor_type
        if no_cursor_timeout:
            self.__query_flags |= _QUERY_OPTIONS["no_timeout"]
        if allow_partial_results:
            self.__query_flags |= _QUERY_OPTIONS["partial"]
        if oplog_replay:
            self.__query_flags |= _QUERY_OPTIONS["oplog_replay"]

        # The namespace to use for find/getMore commands.
        self.__dbname = collection.database.name
        self.__collname = collection.name
Beispiel #29
0
    def __init__(self,
                 collection,
                 filter=None,
                 projection=None,
                 skip=0,
                 limit=0,
                 no_cursor_timeout=False,
                 cursor_type=CursorType.NON_TAILABLE,
                 sort=None,
                 allow_partial_results=False,
                 oplog_replay=False,
                 modifiers=None,
                 batch_size=0,
                 manipulate=True):
        """Create a new cursor.

        Should not be called directly by application developers - see
        :meth:`~pymongo.collection.Collection.find` instead.

        .. mongodoc:: cursors
        """
        self.__id = None

        spec = filter
        if spec is None:
            spec = {}

        validate_is_mapping("filter", spec)
        if not isinstance(skip, int):
            raise TypeError("skip must be an instance of int")
        if not isinstance(limit, int):
            raise TypeError("limit must be an instance of int")
        validate_boolean("no_cursor_timeout", no_cursor_timeout)
        if cursor_type not in (CursorType.NON_TAILABLE, CursorType.TAILABLE,
                               CursorType.TAILABLE_AWAIT, CursorType.EXHAUST):
            raise ValueError("not a valid value for cursor_type")
        validate_boolean("allow_partial_results", allow_partial_results)
        validate_boolean("oplog_replay", oplog_replay)
        if modifiers is not None:
            validate_is_mapping("modifiers", modifiers)
        if not isinstance(batch_size, integer_types):
            raise TypeError("batch_size must be an integer")
        if batch_size < 0:
            raise ValueError("batch_size must be >= 0")

        if projection is not None:
            if not projection:
                projection = {"_id": 1}
            projection = helpers._fields_list_to_dict(projection, "projection")

        self.__collection = collection
        self.__spec = spec
        self.__projection = projection
        self.__skip = skip
        self.__limit = limit
        self.__batch_size = batch_size
        self.__modifiers = modifiers and modifiers.copy() or {}
        self.__ordering = sort and helpers._index_document(sort) or None
        self.__max_scan = None
        self.__explain = False
        self.__hint = None
        self.__comment = None
        self.__max_time_ms = None
        self.__max_await_time_ms = None
        self.__max = None
        self.__min = None
        self.__manipulate = manipulate

        # Exhaust cursor support
        self.__exhaust = False
        self.__exhaust_mgr = None
        if cursor_type == CursorType.EXHAUST:
            if self.__collection.database.client.is_mongos:
                raise InvalidOperation('Exhaust cursors are '
                                       'not supported by mongos')
            if limit:
                raise InvalidOperation("Can't use limit and exhaust together.")
            self.__exhaust = True

        # This is ugly. People want to be able to do cursor[5:5] and
        # get an empty result set (old behavior was an
        # exception). It's hard to do that right, though, because the
        # server uses limit(0) to mean 'no limit'. So we set __empty
        # in that case and check for it when iterating. We also unset
        # it anytime we change __limit.
        self.__empty = False

        self.__data = deque()
        self.__address = None
        self.__retrieved = 0
        self.__killed = False

        self.__codec_options = collection.codec_options
        self.__read_preference = collection.read_preference
        self.__read_concern = collection.read_concern

        self.__query_flags = cursor_type
        if self.__read_preference != ReadPreference.PRIMARY:
            self.__query_flags |= _QUERY_OPTIONS["slave_okay"]
        if no_cursor_timeout:
            self.__query_flags |= _QUERY_OPTIONS["no_timeout"]
        if allow_partial_results:
            self.__query_flags |= _QUERY_OPTIONS["partial"]
        if oplog_replay:
            self.__query_flags |= _QUERY_OPTIONS["oplog_replay"]
Beispiel #30
0
 def _check_closed(self):
     if self._closed:
         raise InvalidOperation("Cannot use MongoClient after close")