def read(self, size=-1):
        """Read at most `size` bytes from the file (less if there
        isn't enough data).

        The bytes are returned as an instance of :class:`str` (:class:`bytes`
        in python 3). If `size` is negative or omitted all data is read.

        :Parameters:
          - `size` (optional): the number of bytes to read
        """
        self._ensure_file()

        if size == 0:
            return EMPTY

        remainder = int(self.length) - self.__position
        if size < 0 or size > remainder:
            size = remainder

        received = 0
        data = StringIO()
        while received < size:
            chunk_data = self.readchunk()
            received += len(chunk_data)
            data.write(chunk_data)

        self.__position -= received - size

        # Return 'size' bytes and store the rest.
        data.seek(size)
        self.__buffer = data.read()
        data.seek(0)
        return data.read(size)
Exemple #2
0
    def readline(self, size=-1):
        """Read one line or up to `size` bytes from the file.

        :Parameters:
         - `size` (optional): the maximum number of bytes to read
        """
        remainder = int(self.length) - self.__position
        if size < 0 or size > remainder:
            size = remainder

        if size == 0:
            return EMPTY

        received = 0
        data = StringIO()
        while received < size:
            chunk_data = self.readchunk()
            pos = chunk_data.find(NEWLN, 0, size)
            if pos != -1:
                size = received + pos + 1

            received += len(chunk_data)
            data.write(chunk_data)
            if pos != -1:
                break

        self.__position -= received - size

        # Return 'size' bytes and store the rest.
        data.seek(size)
        self.__buffer = data.read()
        data.seek(0)
        return data.read(size)
Exemple #3
0
    def read(self, size=-1):
        """Read at most `size` bytes from the file (less if there
        isn't enough data).

        The bytes are returned as an instance of :class:`str` (:class:`bytes`
        in python 3). If `size` is negative or omitted all data is read.

        :Parameters:
          - `size` (optional): the number of bytes to read
        """
        self._ensure_file()

        if size == 0:
            return EMPTY

        remainder = int(self.length) - self.__position
        if size < 0 or size > remainder:
            size = remainder

        received = 0
        data = StringIO()
        while received < size:
            chunk_data = self.readchunk()
            received += len(chunk_data)
            data.write(chunk_data)

        self.__position -= received - size

        # Return 'size' bytes and store the rest.
        data.seek(size)
        self.__buffer = data.read()
        data.seek(0)
        return data.read(size)
    def readline(self, size=-1):
        """Read one line or up to `size` bytes from the file.

        :Parameters:
         - `size` (optional): the maximum number of bytes to read

        .. versionadded:: 1.9
        """
        if size == 0:
            return b('')

        remainder = int(self.length) - self.__position
        if size < 0 or size > remainder:
            size = remainder

        received = 0
        data = StringIO()
        while received < size:
            chunk_data = self.readchunk()
            pos = chunk_data.find(NEWLN, 0, size)
            if pos != -1:
                size = received + pos + 1

            received += len(chunk_data)
            data.write(chunk_data)
            if pos != -1:
                break

        self.__position -= received - size

        # Return 'size' bytes and store the rest.
        data.seek(size)
        self.__buffer = data.read()
        data.seek(0)
        return data.read(size)
Exemple #5
0
    def read(self, size=-1):
        """Read at most `size` bytes from the file (less if there
        isn't enough data).

        The bytes are returned as an instance of :class:`str` (:class:`bytes`
        in python 3). If `size` is negative or omitted all data is read.

        :Parameters:
          - `size` (optional): the number of bytes to read
        """
        self._ensure_file()

        if size == 0:
            return EMPTY

        remainder = int(self.length) - self.__position
        if size < 0 or size > remainder:
            size = remainder

        received = 0
        data = StringIO()
        while received < size:
            chunk_data = self.readchunk()
            received += len(chunk_data)
            data.write(chunk_data)

        # Detect extra chunks.
        max_chunk_n = math.ceil(self.length / float(self.chunk_size))
        chunk = self.__chunks.find_one(
            {
                "files_id": self._id,
                "n": {
                    "$gte": max_chunk_n
                }
            },
            session=self._session)
        # According to spec, ignore extra chunks if they are empty.
        if chunk is not None and len(chunk['data']):
            raise CorruptGridFile(
                "Extra chunk found: expected %i chunks but found "
                "chunk with n=%i" % (max_chunk_n, chunk['n']))

        self.__position -= received - size

        # Return 'size' bytes and store the rest.
        data.seek(size)
        self.__buffer = data.read()
        data.seek(0)
        return data.read(size)
    def test_zip(self):
        zf = StringIO()
        z = zipfile.ZipFile(zf, "w")
        z.writestr("test.txt", b"hello world")
        z.close()
        zf.seek(0)

        f = GridIn(self.db.fs, filename="test.zip")
        f.write(zf)
        f.close()
        self.assertEqual(1, self.db.fs.files.count_documents({}))
        self.assertEqual(1, self.db.fs.chunks.count_documents({}))

        g = GridOut(self.db.fs, f._id)
        z = zipfile.ZipFile(g)
        self.assertSequenceEqual(z.namelist(), ["test.txt"])
        self.assertEqual(z.read("test.txt"), b"hello world")
    def read(self, size=-1):
        """Read at most `size` bytes from the file (less if there
        isn't enough data).

        The bytes are returned as an instance of :class:`str` (:class:`bytes`
        in python 3). If `size` is negative or omitted all data is read.

        :Parameters:
          - `size` (optional): the number of bytes to read

        .. versionchanged:: 3.8
           This method now only checks for extra chunks after reading the
           entire file. Previously, this method would check for extra chunks
           on every call.
        """
        self._ensure_file()

        remainder = int(self.length) - self.__position
        if size < 0 or size > remainder:
            size = remainder

        if size == 0:
            return EMPTY

        received = 0
        data = StringIO()
        while received < size:
            chunk_data = self.readchunk()
            received += len(chunk_data)
            data.write(chunk_data)

        # Detect extra chunks after reading the entire file.
        if size == remainder and self.__chunk_iter:
            try:
                self.__chunk_iter.next()
            except StopIteration:
                pass

        self.__position -= received - size

        # Return 'size' bytes and store the rest.
        data.seek(size)
        self.__buffer = data.read()
        data.seek(0)
        return data.read(size)
Exemple #8
0
    def read(self, size=-1):
        """Read at most `size` bytes from the file (less if there
        isn't enough data).

        The bytes are returned as an instance of :class:`str` (:class:`bytes`
        in python 3). If `size` is negative or omitted all data is read.

        :Parameters:
          - `size` (optional): the number of bytes to read

        .. versionchanged:: 3.8
           This method now only checks for extra chunks after reading the
           entire file. Previously, this method would check for extra chunks
           on every call.
        """
        self._ensure_file()

        remainder = int(self.length) - self.__position
        if size < 0 or size > remainder:
            size = remainder

        if size == 0:
            return EMPTY

        received = 0
        data = StringIO()
        while received < size:
            chunk_data = self.readchunk()
            received += len(chunk_data)
            data.write(chunk_data)

        # Detect extra chunks after reading the entire file.
        if size == remainder and self.__chunk_iter:
            try:
                self.__chunk_iter.next()
            except StopIteration:
                pass

        self.__position -= received - size

        # Return 'size' bytes and store the rest.
        data.seek(size)
        self.__buffer = data.read()
        data.seek(0)
        return data.read(size)
    def read(self, size=-1):
        """Read at most `size` bytes from the file (less if there
        isn't enough data).

        The bytes are returned as an instance of :class:`str` (:class:`bytes`
        in python 3). If `size` is negative or omitted all data is read.

        :Parameters:
          - `size` (optional): the number of bytes to read
        """
        self._ensure_file()

        if size == 0:
            return EMPTY

        remainder = int(self.length) - self.__position
        if size < 0 or size > remainder:
            size = remainder

        received = 0
        data = StringIO()
        while received < size:
            chunk_data = self.readchunk()
            received += len(chunk_data)
            data.write(chunk_data)

        # Detect extra chunks.
        max_chunk_n = math.ceil(self.length / float(self.chunk_size))
        chunk = self.__chunks.find_one({"files_id": self._id,
                                        "n": {"$gte": max_chunk_n}},
                                       session=self._session)
        # According to spec, ignore extra chunks if they are empty.
        if chunk is not None and len(chunk['data']):
            raise CorruptGridFile(
                "Extra chunk found: expected %i chunks but found "
                "chunk with n=%i" % (max_chunk_n, chunk['n']))

        self.__position -= received - size

        # Return 'size' bytes and store the rest.
        data.seek(size)
        self.__buffer = data.read()
        data.seek(0)
        return data.read(size)
Exemple #10
0
def _do_batched_write_command(
        namespace, operation, command, docs, check_keys, opts, ctx):
    """Create the next batched insert, update, or delete command.
    """
    buf = StringIO()

    # Save space for message length and request id
    buf.write(_ZERO_64)
    # responseTo, opCode
    buf.write(b"\x00\x00\x00\x00\xd4\x07\x00\x00")

    # Write OP_QUERY write command
    to_send, length = _batched_write_command(
        namespace, operation, command, docs, check_keys, opts, ctx, buf)

    # Header - request id and message length
    buf.seek(4)
    request_id = _randint()
    buf.write(struct.pack('<i', request_id))
    buf.seek(0)
    buf.write(struct.pack('<i', length))

    return request_id, buf.getvalue(), to_send
    def test_download_to_stream_by_name(self):
        file1 = StringIO(b"hello world")
        # Test with one chunk.
        oid = self.fs.upload_from_stream("one_chunk", file1)
        self.assertEqual(1, self.db.fs.chunks.count())
        file2 = StringIO()
        self.fs.download_to_stream_by_name("one_chunk", file2)
        file1.seek(0)
        file2.seek(0)
        self.assertEqual(file1.read(), file2.read())

        # Test with many chunks.
        self.db.drop_collection("fs.files")
        self.db.drop_collection("fs.chunks")
        file1.seek(0)
        self.fs.upload_from_stream("many_chunks", file1, chunk_size_bytes=1)
        self.assertEqual(11, self.db.fs.chunks.count())

        file2 = StringIO()
        self.fs.download_to_stream_by_name("many_chunks", file2)
        file1.seek(0)
        file2.seek(0)
        self.assertEqual(file1.read(), file2.read())
def _do_batched_insert(collection_name, docs, check_keys, safe,
                       last_error_args, continue_on_error, uuid_subtype,
                       client):
    """Insert `docs` using multiple batches.
    """
    def _insert_message(insert_message, send_safe):
        """Build the insert message with header and GLE.
        """
        request_id, final_message = __pack_message(2002, insert_message)
        if send_safe:
            request_id, error_message, _ = __last_error(
                collection_name, last_error_args)
            final_message += error_message
        return request_id, final_message

    send_safe = safe or not continue_on_error
    last_error = None
    data = StringIO()
    data.write(struct.pack("<i", int(continue_on_error)))
    data.write(bson._make_c_string(collection_name))
    message_length = begin_loc = data.tell()
    has_docs = False
    for doc in docs:
        encoded = bson.BSON.encode(doc, check_keys, uuid_subtype)
        encoded_length = len(encoded)
        too_large = (encoded_length > client.max_bson_size)

        message_length += encoded_length
        if message_length < client.max_message_size and not too_large:
            data.write(encoded)
            has_docs = True
            continue

        if has_docs:
            # We have enough data, send this message.
            try:
                client._send_message(
                    _insert_message(data.getvalue(), send_safe), send_safe)
            # Exception type could be OperationFailure or a subtype
            # (e.g. DuplicateKeyError)
            except OperationFailure, exc:
                # Like it says, continue on error...
                if continue_on_error:
                    # Store exception details to re-raise after the final batch.
                    last_error = exc
                # With unacknowledged writes just return at the first error.
                elif not safe:
                    return
                # With acknowledged writes raise immediately.
                else:
                    raise

        if too_large:
            raise DocumentTooLarge("BSON document too large (%d bytes)"
                                   " - the connected server supports"
                                   " BSON document sizes up to %d"
                                   " bytes." %
                                   (encoded_length, client.max_bson_size))

        message_length = begin_loc + encoded_length
        data.seek(begin_loc)
        data.truncate()
        data.write(encoded)
Exemple #13
0
def _do_batched_write_command(namespace, operation, command, docs, check_keys,
                              opts, ctx):
    """Create the next batched insert, update, or delete command.
    """
    max_bson_size = ctx.max_bson_size
    max_write_batch_size = ctx.max_write_batch_size
    # Max BSON object size + 16k - 2 bytes for ending NUL bytes.
    # Server guarantees there is enough room: SERVER-10643.
    max_cmd_size = max_bson_size + _COMMAND_OVERHEAD

    buf = StringIO()
    # Save space for message length and request id
    buf.write(_ZERO_64)
    # responseTo, opCode
    buf.write(b"\x00\x00\x00\x00\xd4\x07\x00\x00")
    # No options
    buf.write(_ZERO_32)
    # Namespace as C string
    buf.write(b(namespace))
    buf.write(_ZERO_8)
    # Skip: 0, Limit: -1
    buf.write(_SKIPLIM)

    # Where to write command document length
    command_start = buf.tell()
    buf.write(bson.BSON.encode(command))

    # Start of payload
    buf.seek(-1, 2)
    # Work around some Jython weirdness.
    buf.truncate()
    try:
        buf.write(_OP_MAP[operation])
    except KeyError:
        raise InvalidOperation('Unknown command')

    if operation in (_UPDATE, _DELETE):
        check_keys = False

    # Where to write list document length
    list_start = buf.tell() - 4
    to_send = []
    idx = 0
    for doc in docs:
        # Encode the current operation
        key = b(str(idx))
        value = bson.BSON.encode(doc, check_keys, opts)
        # Is there enough room to add this document? max_cmd_size accounts for
        # the two trailing null bytes.
        enough_data = (buf.tell() + len(key) + len(value)) >= max_cmd_size
        enough_documents = (idx >= max_write_batch_size)
        if enough_data or enough_documents:
            if not idx:
                write_op = "insert" if operation == _INSERT else None
                _raise_document_too_large(write_op, len(value), max_bson_size)
            break
        buf.write(_BSONOBJ)
        buf.write(key)
        buf.write(_ZERO_8)
        buf.write(value)
        to_send.append(doc)
        idx += 1

    # Finalize the current OP_QUERY message.
    # Close list and command documents
    buf.write(_ZERO_16)

    # Write document lengths and request id
    length = buf.tell()
    buf.seek(list_start)
    buf.write(struct.pack('<i', length - list_start - 1))
    buf.seek(command_start)
    buf.write(struct.pack('<i', length - command_start))
    buf.seek(4)
    request_id = _randint()
    buf.write(struct.pack('<i', request_id))
    buf.seek(0)
    buf.write(struct.pack('<i', length))

    return request_id, buf.getvalue(), to_send
Exemple #14
0
def _do_batched_write_command(namespace, operation, command,
                              docs, check_keys, uuid_subtype, client):
    """Execute a batch of insert, update, or delete commands.
    """
    max_bson_size = client.max_bson_size
    # Max BSON object size + 16k - 2 bytes for ending NUL bytes
    # XXX: This should come from the server - SERVER-10643
    max_cmd_size = max_bson_size + 16382

    ordered = command.get('ordered', True)

    buf = StringIO()
    # Save space for message length and request id
    buf.write(_ZERO_64)
    # responseTo, opCode
    buf.write(b("\x00\x00\x00\x00\xd4\x07\x00\x00"))
    # No options
    buf.write(_ZERO_32)
    # Namespace as C string
    buf.write(b(namespace))
    buf.write(_ZERO_8)
    # Skip: 0, Limit: -1
    buf.write(_SKIPLIM)

    # Where to write command document length
    command_start = buf.tell()
    buf.write(bson.BSON.encode(command))

    # Start of payload
    buf.seek(-1, 2)
    # Work around some Jython weirdness.
    buf.truncate()
    try:
        buf.write(_OP_MAP[operation])
    except KeyError:
        raise InvalidOperation('Unknown command')

    if operation in (_UPDATE, _DELETE):
        check_keys = False

    # Where to write list document length
    list_start = buf.tell() - 4

    def send_message():
        """Finalize and send the current OP_QUERY message.
        """
        # Close list and command documents
        buf.write(_ZERO_16)

        # Write document lengths and request id
        length = buf.tell()
        buf.seek(list_start)
        buf.write(struct.pack('<i', length - list_start - 1))
        buf.seek(command_start)
        buf.write(struct.pack('<i', length - command_start))
        buf.seek(4)
        request_id = random.randint(MIN_INT32, MAX_INT32)
        buf.write(struct.pack('<i', request_id))
        buf.seek(0)
        buf.write(struct.pack('<i', length))

        return client._send_message((request_id, buf.getvalue()),
                                    with_last_error=True,
                                    command=True)

    # If there are multiple batches we'll
    # merge results in the caller.
    results = []

    idx = 0
    idx_offset = 0
    has_docs = False
    for doc in docs:
        has_docs = True
        # Encode the current operation
        key = b(str(idx))
        value = bson.BSON.encode(doc, check_keys, uuid_subtype)
        # Send a batch?
        if (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size:
            if not idx:
                if operation == _INSERT:
                    raise InvalidDocument("BSON document too large (%d bytes)"
                                          " - the connected server supports"
                                          " BSON document sizes up to %d"
                                          " bytes." % (len(value),
                                                       max_bson_size))
                # There's nothing intelligent we can say
                # about size for update and remove
                raise InvalidDocument("command document too large")
            result = send_message()
            results.append((idx_offset, result))
            if ordered and "writeErrors" in result:
                return results

            # Truncate back to the start of list elements
            buf.seek(list_start + 4)
            buf.truncate()
            idx_offset += idx
            idx = 0
            key = b('0')
        buf.write(_BSONOBJ)
        buf.write(key)
        buf.write(_ZERO_8)
        buf.write(value)
        idx += 1

    if not has_docs:
        raise InvalidOperation("cannot do an empty bulk write")

    results.append((idx_offset, send_message()))
    return results
Exemple #15
0
def _do_batched_insert(collection_name, docs, check_keys,
                       safe, last_error_args, continue_on_error, opts,
                       sock_info):
    """Insert `docs` using multiple batches.
    """
    def _insert_message(insert_message, send_safe):
        """Build the insert message with header and GLE.
        """
        request_id, final_message = __pack_message(2002, insert_message)
        if send_safe:
            request_id, error_message, _ = __last_error(collection_name,
                                                        last_error_args)
            final_message += error_message
        return request_id, final_message

    send_safe = safe or not continue_on_error
    last_error = None
    data = StringIO()
    data.write(struct.pack("<i", int(continue_on_error)))
    data.write(bson._make_c_string(collection_name))
    message_length = begin_loc = data.tell()
    has_docs = False
    for doc in docs:
        encoded = bson.BSON.encode(doc, check_keys, opts)
        encoded_length = len(encoded)
        too_large = (encoded_length > sock_info.max_bson_size)

        message_length += encoded_length
        if message_length < sock_info.max_message_size and not too_large:
            data.write(encoded)
            has_docs = True
            continue

        if has_docs:
            # We have enough data, send this message.
            try:
                request_id, msg = _insert_message(data.getvalue(), send_safe)
                sock_info.legacy_write(request_id, msg, 0, send_safe)
            # Exception type could be OperationFailure or a subtype
            # (e.g. DuplicateKeyError)
            except OperationFailure as exc:
                # Like it says, continue on error...
                if continue_on_error:
                    # Store exception details to re-raise after the final batch.
                    last_error = exc
                # With unacknowledged writes just return at the first error.
                elif not safe:
                    return
                # With acknowledged writes raise immediately.
                else:
                    raise

        if too_large:
            raise DocumentTooLarge("BSON document too large (%d bytes)"
                                   " - the connected server supports"
                                   " BSON document sizes up to %d"
                                   " bytes." %
                                   (encoded_length, sock_info.max_bson_size))

        message_length = begin_loc + encoded_length
        data.seek(begin_loc)
        data.truncate()
        data.write(encoded)

    if not has_docs:
        raise InvalidOperation("cannot do an empty bulk insert")

    request_id, msg = _insert_message(data.getvalue(), safe)
    sock_info.legacy_write(request_id, msg, 0, safe)

    # Re-raise any exception stored due to continue_on_error
    if last_error is not None:
        raise last_error
def _do_batched_insert(collection_name, docs, check_keys,
                       safe, last_error_args, continue_on_error, opts,
                       ctx):
    """Insert `docs` using multiple batches.
    """
    def _insert_message(insert_message, send_safe):
        """Build the insert message with header and GLE.
        """
        request_id, final_message = __pack_message(2002, insert_message)
        if send_safe:
            request_id, error_message, _ = __last_error(collection_name,
                                                        last_error_args)
            final_message += error_message
        return request_id, final_message

    send_safe = safe or not continue_on_error
    last_error = None
    data = StringIO()
    data.write(struct.pack("<i", int(continue_on_error)))
    data.write(_make_c_string(collection_name))
    message_length = begin_loc = data.tell()
    has_docs = False
    to_send = []
    encode = _dict_to_bson  # Make local
    compress = ctx.compress and not (safe or send_safe)
    for doc in docs:
        encoded = encode(doc, check_keys, opts)
        encoded_length = len(encoded)
        too_large = (encoded_length > ctx.max_bson_size)

        message_length += encoded_length
        if message_length < ctx.max_message_size and not too_large:
            data.write(encoded)
            to_send.append(doc)
            has_docs = True
            continue

        if has_docs:
            # We have enough data, send this message.
            try:
                if compress:
                    rid, msg = None, data.getvalue()
                else:
                    rid, msg = _insert_message(data.getvalue(), send_safe)
                ctx.legacy_bulk_insert(
                    rid, msg, 0, send_safe, to_send, compress)
            # Exception type could be OperationFailure or a subtype
            # (e.g. DuplicateKeyError)
            except OperationFailure as exc:
                # Like it says, continue on error...
                if continue_on_error:
                    # Store exception details to re-raise after the final batch.
                    last_error = exc
                # With unacknowledged writes just return at the first error.
                elif not safe:
                    return
                # With acknowledged writes raise immediately.
                else:
                    raise

        if too_large:
            _raise_document_too_large(
                "insert", encoded_length, ctx.max_bson_size)

        message_length = begin_loc + encoded_length
        data.seek(begin_loc)
        data.truncate()
        data.write(encoded)
        to_send = [doc]

    if not has_docs:
        raise InvalidOperation("cannot do an empty bulk insert")

    if compress:
        request_id, msg = None, data.getvalue()
    else:
        request_id, msg = _insert_message(data.getvalue(), safe)
    ctx.legacy_bulk_insert(request_id, msg, 0, safe, to_send, compress)

    # Re-raise any exception stored due to continue_on_error
    if last_error is not None:
        raise last_error
Exemple #17
0
def _do_batched_insert(collection_name, docs, check_keys, safe,
                       last_error_args, continue_on_error, opts, ctx):
    """Insert `docs` using multiple batches.
    """
    def _insert_message(insert_message, send_safe):
        """Build the insert message with header and GLE.
        """
        request_id, final_message = __pack_message(2002, insert_message)
        if send_safe:
            request_id, error_message, _ = __last_error(
                collection_name, last_error_args)
            final_message += error_message
        return request_id, final_message

    send_safe = safe or not continue_on_error
    last_error = None
    data = StringIO()
    data.write(struct.pack("<i", int(continue_on_error)))
    data.write(bson._make_c_string(collection_name))
    message_length = begin_loc = data.tell()
    has_docs = False
    to_send = []
    for doc in docs:
        encoded = bson.BSON.encode(doc, check_keys, opts)
        encoded_length = len(encoded)
        too_large = (encoded_length > ctx.max_bson_size)

        message_length += encoded_length
        if message_length < ctx.max_message_size and not too_large:
            data.write(encoded)
            to_send.append(doc)
            has_docs = True
            continue

        if has_docs:
            # We have enough data, send this message.
            try:
                request_id, msg = _insert_message(data.getvalue(), send_safe)
                ctx.legacy_write(request_id, msg, 0, send_safe, to_send)
            # Exception type could be OperationFailure or a subtype
            # (e.g. DuplicateKeyError)
            except OperationFailure as exc:
                # Like it says, continue on error...
                if continue_on_error:
                    # Store exception details to re-raise after the final batch.
                    last_error = exc
                # With unacknowledged writes just return at the first error.
                elif not safe:
                    return
                # With acknowledged writes raise immediately.
                else:
                    raise

        if too_large:
            _raise_document_too_large("insert", encoded_length,
                                      ctx.max_bson_size)

        message_length = begin_loc + encoded_length
        data.seek(begin_loc)
        data.truncate()
        data.write(encoded)
        to_send = [doc]

    if not has_docs:
        raise InvalidOperation("cannot do an empty bulk insert")

    request_id, msg = _insert_message(data.getvalue(), safe)
    ctx.legacy_write(request_id, msg, 0, safe, to_send)

    # Re-raise any exception stored due to continue_on_error
    if last_error is not None:
        raise last_error
Exemple #18
0
def _do_batched_write_command(namespace, operation, command, docs, check_keys,
                              uuid_subtype, client):
    """Execute a batch of insert, update, or delete commands.
    """
    max_bson_size = client.max_bson_size
    max_write_batch_size = client.max_write_batch_size
    # Max BSON object size + 16k - 2 bytes for ending NUL bytes
    # XXX: This should come from the server - SERVER-10643
    max_cmd_size = max_bson_size + 16382

    ordered = command.get('ordered', True)

    buf = StringIO()
    # Save space for message length and request id
    buf.write(_ZERO_64)
    # responseTo, opCode
    buf.write(b("\x00\x00\x00\x00\xd4\x07\x00\x00"))
    # No options
    buf.write(_ZERO_32)
    # Namespace as C string
    buf.write(b(namespace))
    buf.write(_ZERO_8)
    # Skip: 0, Limit: -1
    buf.write(_SKIPLIM)

    # Where to write command document length
    command_start = buf.tell()
    buf.write(bson.BSON.encode(command))

    # Start of payload
    buf.seek(-1, 2)
    # Work around some Jython weirdness.
    buf.truncate()
    try:
        buf.write(_OP_MAP[operation])
    except KeyError:
        raise InvalidOperation('Unknown command')

    if operation in (_UPDATE, _DELETE):
        check_keys = False

    # Where to write list document length
    list_start = buf.tell() - 4

    def send_message():
        """Finalize and send the current OP_QUERY message.
        """
        # Close list and command documents
        buf.write(_ZERO_16)

        # Write document lengths and request id
        length = buf.tell()
        buf.seek(list_start)
        buf.write(struct.pack('<i', length - list_start - 1))
        buf.seek(command_start)
        buf.write(struct.pack('<i', length - command_start))
        buf.seek(4)
        request_id = random.randint(MIN_INT32, MAX_INT32)
        buf.write(struct.pack('<i', request_id))
        buf.seek(0)
        buf.write(struct.pack('<i', length))

        return client._send_message((request_id, buf.getvalue()),
                                    with_last_error=True,
                                    command=True)

    # If there are multiple batches we'll
    # merge results in the caller.
    results = []

    idx = 0
    idx_offset = 0
    has_docs = False
    for doc in docs:
        has_docs = True
        # Encode the current operation
        key = b(str(idx))
        value = bson.BSON.encode(doc, check_keys, uuid_subtype)
        # Send a batch?
        enough_data = (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size
        enough_documents = (idx >= max_write_batch_size)
        if enough_data or enough_documents:
            if not idx:
                if operation == _INSERT:
                    raise DocumentTooLarge("BSON document too large (%d bytes)"
                                           " - the connected server supports"
                                           " BSON document sizes up to %d"
                                           " bytes." %
                                           (len(value), max_bson_size))
                # There's nothing intelligent we can say
                # about size for update and remove
                raise DocumentTooLarge("command document too large")
            result = send_message()
            results.append((idx_offset, result))
            if ordered and "writeErrors" in result:
                return results

            # Truncate back to the start of list elements
            buf.seek(list_start + 4)
            buf.truncate()
            idx_offset += idx
            idx = 0
            key = b('0')
        buf.write(_BSONOBJ)
        buf.write(key)
        buf.write(_ZERO_8)
        buf.write(value)
        idx += 1

    if not has_docs:
        raise InvalidOperation("cannot do an empty bulk write")

    results.append((idx_offset, send_message()))
    return results
Exemple #19
0
def _do_batched_write_command(namespace, operation, command,
                              docs, check_keys, opts, ctx):
    """Execute a batch of insert, update, or delete commands.
    """
    max_bson_size = ctx.max_bson_size
    max_write_batch_size = ctx.max_write_batch_size
    # Max BSON object size + 16k - 2 bytes for ending NUL bytes.
    # Server guarantees there is enough room: SERVER-10643.
    max_cmd_size = max_bson_size + _COMMAND_OVERHEAD

    ordered = command.get('ordered', True)

    buf = StringIO()
    # Save space for message length and request id
    buf.write(_ZERO_64)
    # responseTo, opCode
    buf.write(b"\x00\x00\x00\x00\xd4\x07\x00\x00")
    # No options
    buf.write(_ZERO_32)
    # Namespace as C string
    buf.write(b(namespace))
    buf.write(_ZERO_8)
    # Skip: 0, Limit: -1
    buf.write(_SKIPLIM)

    # Where to write command document length
    command_start = buf.tell()
    buf.write(bson.BSON.encode(command))

    # Start of payload
    buf.seek(-1, 2)
    # Work around some Jython weirdness.
    buf.truncate()
    try:
        buf.write(_OP_MAP[operation])
    except KeyError:
        raise InvalidOperation('Unknown command')

    if operation in (_UPDATE, _DELETE):
        check_keys = False

    # Where to write list document length
    list_start = buf.tell() - 4

    to_send = []

    def send_message():
        """Finalize and send the current OP_QUERY message.
        """
        # Close list and command documents
        buf.write(_ZERO_16)

        # Write document lengths and request id
        length = buf.tell()
        buf.seek(list_start)
        buf.write(struct.pack('<i', length - list_start - 1))
        buf.seek(command_start)
        buf.write(struct.pack('<i', length - command_start))
        buf.seek(4)
        request_id = _randint()
        buf.write(struct.pack('<i', request_id))
        buf.seek(0)
        buf.write(struct.pack('<i', length))
        return ctx.write_command(request_id, buf.getvalue(), to_send)

    # If there are multiple batches we'll
    # merge results in the caller.
    results = []

    idx = 0
    idx_offset = 0
    has_docs = False
    for doc in docs:
        has_docs = True
        # Encode the current operation
        key = b(str(idx))
        value = bson.BSON.encode(doc, check_keys, opts)
        # Send a batch?
        enough_data = (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size
        enough_documents = (idx >= max_write_batch_size)
        if enough_data or enough_documents:
            if not idx:
                write_op = "insert" if operation == _INSERT else None
                _raise_document_too_large(
                    write_op, len(value), max_bson_size)
            result = send_message()
            results.append((idx_offset, result))
            if ordered and "writeErrors" in result:
                return results

            # Truncate back to the start of list elements
            buf.seek(list_start + 4)
            buf.truncate()
            idx_offset += idx
            idx = 0
            key = b'0'
            to_send = []
        buf.write(_BSONOBJ)
        buf.write(key)
        buf.write(_ZERO_8)
        buf.write(value)
        to_send.append(doc)
        idx += 1

    if not has_docs:
        raise InvalidOperation("cannot do an empty bulk write")

    results.append((idx_offset, send_message()))
    return results
Exemple #20
0
def _do_batched_write_command(namespace, operation, command,
                              docs, check_keys, opts, ctx):
    """Create the next batched insert, update, or delete command.
    """
    max_bson_size = ctx.max_bson_size
    max_write_batch_size = ctx.max_write_batch_size
    # Max BSON object size + 16k - 2 bytes for ending NUL bytes.
    # Server guarantees there is enough room: SERVER-10643.
    max_cmd_size = max_bson_size + _COMMAND_OVERHEAD

    buf = StringIO()
    # Save space for message length and request id
    buf.write(_ZERO_64)
    # responseTo, opCode
    buf.write(b"\x00\x00\x00\x00\xd4\x07\x00\x00")
    # No options
    buf.write(_ZERO_32)
    # Namespace as C string
    buf.write(b(namespace))
    buf.write(_ZERO_8)
    # Skip: 0, Limit: -1
    buf.write(_SKIPLIM)

    # Where to write command document length
    command_start = buf.tell()
    buf.write(bson.BSON.encode(command))

    # Start of payload
    buf.seek(-1, 2)
    # Work around some Jython weirdness.
    buf.truncate()
    try:
        buf.write(_OP_MAP[operation])
    except KeyError:
        raise InvalidOperation('Unknown command')

    if operation in (_UPDATE, _DELETE):
        check_keys = False

    # Where to write list document length
    list_start = buf.tell() - 4
    to_send = []
    idx = 0
    for doc in docs:
        # Encode the current operation
        key = b(str(idx))
        value = bson.BSON.encode(doc, check_keys, opts)
        # Is there enough room to add this document? max_cmd_size accounts for
        # the two trailing null bytes.
        enough_data = (buf.tell() + len(key) + len(value)) >= max_cmd_size
        enough_documents = (idx >= max_write_batch_size)
        if enough_data or enough_documents:
            if not idx:
                write_op = "insert" if operation == _INSERT else None
                _raise_document_too_large(
                    write_op, len(value), max_bson_size)
            break
        buf.write(_BSONOBJ)
        buf.write(key)
        buf.write(_ZERO_8)
        buf.write(value)
        to_send.append(doc)
        idx += 1

    # Finalize the current OP_QUERY message.
    # Close list and command documents
    buf.write(_ZERO_16)

    # Write document lengths and request id
    length = buf.tell()
    buf.seek(list_start)
    buf.write(struct.pack('<i', length - list_start - 1))
    buf.seek(command_start)
    buf.write(struct.pack('<i', length - command_start))
    buf.seek(4)
    request_id = _randint()
    buf.write(struct.pack('<i', request_id))
    buf.seek(0)
    buf.write(struct.pack('<i', length))

    return request_id, buf.getvalue(), to_send
Exemple #21
0
def _do_batched_write_command(namespace, operation, command, docs, check_keys,
                              opts, ctx):
    """Execute a batch of insert, update, or delete commands.
    """
    max_bson_size = ctx.max_bson_size
    max_write_batch_size = ctx.max_write_batch_size
    # Max BSON object size + 16k - 2 bytes for ending NUL bytes.
    # Server guarantees there is enough room: SERVER-10643.
    max_cmd_size = max_bson_size + _COMMAND_OVERHEAD

    ordered = command.get('ordered', True)

    buf = StringIO()
    # Save space for message length and request id
    buf.write(_ZERO_64)
    # responseTo, opCode
    buf.write(b"\x00\x00\x00\x00\xd4\x07\x00\x00")
    # No options
    buf.write(_ZERO_32)
    # Namespace as C string
    buf.write(b(namespace))
    buf.write(_ZERO_8)
    # Skip: 0, Limit: -1
    buf.write(_SKIPLIM)

    # Where to write command document length
    command_start = buf.tell()
    buf.write(bson.BSON.encode(command))

    # Start of payload
    buf.seek(-1, 2)
    # Work around some Jython weirdness.
    buf.truncate()
    try:
        buf.write(_OP_MAP[operation])
    except KeyError:
        raise InvalidOperation('Unknown command')

    if operation in (_UPDATE, _DELETE):
        check_keys = False

    # Where to write list document length
    list_start = buf.tell() - 4

    to_send = []

    def send_message():
        """Finalize and send the current OP_QUERY message.
        """
        # Close list and command documents
        buf.write(_ZERO_16)

        # Write document lengths and request id
        length = buf.tell()
        buf.seek(list_start)
        buf.write(struct.pack('<i', length - list_start - 1))
        buf.seek(command_start)
        buf.write(struct.pack('<i', length - command_start))
        buf.seek(4)
        request_id = _randint()
        buf.write(struct.pack('<i', request_id))
        buf.seek(0)
        buf.write(struct.pack('<i', length))
        return ctx.write_command(request_id, buf.getvalue(), to_send)

    # If there are multiple batches we'll
    # merge results in the caller.
    results = []

    idx = 0
    idx_offset = 0
    has_docs = False
    for doc in docs:
        has_docs = True
        # Encode the current operation
        key = b(str(idx))
        value = bson.BSON.encode(doc, check_keys, opts)
        # Send a batch?
        enough_data = (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size
        enough_documents = (idx >= max_write_batch_size)
        if enough_data or enough_documents:
            if not idx:
                write_op = "insert" if operation == _INSERT else None
                _raise_document_too_large(write_op, len(value), max_bson_size)
            result = send_message()
            results.append((idx_offset, result))
            if ordered and "writeErrors" in result:
                return results

            # Truncate back to the start of list elements
            buf.seek(list_start + 4)
            buf.truncate()
            idx_offset += idx
            idx = 0
            key = b'0'
            to_send = []
        buf.write(_BSONOBJ)
        buf.write(key)
        buf.write(_ZERO_8)
        buf.write(value)
        to_send.append(doc)
        idx += 1

    if not has_docs:
        raise InvalidOperation("cannot do an empty bulk write")

    results.append((idx_offset, send_message()))
    return results
Exemple #22
0
def _do_batched_write_command(namespace, operation, command,
                              docs, check_keys, uuid_subtype, client):
    """Execute a batch of insert, update, or delete commands.
    """
    max_bson_size = client.max_bson_size
    # Max BSON object size + 16k - 2 bytes for ending NUL bytes
    # XXX: This should come from the server - SERVER-10643
    max_cmd_size = max_bson_size + 16382

    ordered = command.get('ordered', True)

    buf = StringIO()
    # Save space for message length and request id
    buf.write(_ZERO_64)
    # responseTo, opCode
    buf.write(b("\x00\x00\x00\x00\xd4\x07\x00\x00"))
    # No options
    buf.write(_ZERO_32)
    # Namespace as C string
    buf.write(b(namespace))
    buf.write(_ZERO_8)
    # Skip: 0, Limit: -1
    buf.write(_SKIPLIM)

    # Where to write command document length
    command_start = buf.tell()
    buf.write(bson.BSON.encode(command))

    # Start of payload
    buf.seek(-1, 2)
    # Work around some Jython weirdness.
    buf.truncate()
    try:
        buf.write(_OP_MAP[operation])
    except KeyError:
        raise InvalidOperation('Unknown command')

    if operation in (_UPDATE, _DELETE):
        check_keys = False

    # Where to write list document length
    list_start = buf.tell() - 4

    def send_message():
        """Finalize and send the current OP_QUERY message.
        """
        # Close list and command documents
        buf.write(_ZERO_16)

        # Write document lengths and request id
        length = buf.tell()
        buf.seek(list_start)
        buf.write(struct.pack('<i', length - list_start - 1))
        buf.seek(command_start)
        buf.write(struct.pack('<i', length - command_start))
        buf.seek(4)
        request_id = random.randint(MIN_INT32, MAX_INT32)
        buf.write(struct.pack('<i', request_id))
        buf.seek(0)
        buf.write(struct.pack('<i', length))

        try:
            result = client._send_message((request_id, buf.getvalue()),
                                          with_last_error=True,
                                          command=True)
        except OperationFailure, exc:
            # If we were called from the bulk API we could be
            # many batches in. We have to update the indexes of
            # failed documents in the error document, using the
            # full offset including any previous batches. Do
            # that and re-raise in the caller.
            details = exc.error_document
            if not details:
                # Some error not related to write commands
                # (e.g. kerberos failure). Re-raise immediately.
                raise
            return True, details

        return not result.get('ok'), result