Example #1
0
def _do_batched_insert(collection_name, docs, check_keys, safe,
                       last_error_args, continue_on_error, uuid_subtype,
                       client):
    """Insert `docs` using multiple batches.
    """
    def _insert_message(insert_message, send_safe):
        """Build the insert message with header and GLE.
        """
        request_id, final_message = __pack_message(2002, insert_message)
        if send_safe:
            request_id, error_message, _ = __last_error(
                collection_name, last_error_args)
            final_message += error_message
        return request_id, final_message

    if not docs:
        raise InvalidOperation("cannot do an empty bulk insert")

    last_error = None
    begin = struct.pack("<i", int(continue_on_error))
    begin += bson._make_c_string(collection_name)
    message_length = len(begin)
    data = [begin]
    for doc in docs:
        encoded = bson.BSON.encode(doc, check_keys, uuid_subtype)
        encoded_length = len(encoded)
        if encoded_length > client.max_bson_size:
            raise InvalidDocument("BSON document too large (%d bytes)"
                                  " - the connected server supports"
                                  " BSON document sizes up to %d"
                                  " bytes." %
                                  (encoded_length, client.max_bson_size))
        message_length += encoded_length
        if message_length < client.max_message_size:
            data.append(encoded)
            continue

        # We have enough data, send this message.
        send_safe = safe or not continue_on_error
        try:
            client._send_message(_insert_message(EMPTY.join(data), send_safe),
                                 send_safe)
        # Exception type could be OperationFailure or a subtype
        # (e.g. DuplicateKeyError)
        except OperationFailure, exc:
            # Like it says, continue on error...
            if continue_on_error:
                # Store exception details to re-raise after the final batch.
                last_error = exc
            # With unacknowledged writes just return at the first error.
            elif not safe:
                return
            # With acknowledged writes raise immediately.
            else:
                raise
        message_length = len(begin) + encoded_length
        data = [begin, encoded]
Example #2
0
def _make_c_string(string, check_null=False):
    if check_null and "\x00" in string:
        raise InvalidDocument("BSON keys / regex patterns must not "
                              "contain a NULL character")
    if isinstance(string, unicode):
        return string.encode("utf-8") + "\x00"
    else:
        try:
            string.decode("utf-8")
            return string + "\x00"
        except:
            raise InvalidStringData("strings in documents must be valid "
                                    "UTF-8: %r" % string)
Example #3
0
def _dict_to_bson(dict, check_keys, top_level=True):
    try:
        elements = ""
        if top_level and "_id" in dict:
            elements += _element_to_bson("_id", dict["_id"], False)
        for (key, value) in dict.iteritems():
            if not top_level or key != "_id":
                elements += _element_to_bson(key, value, check_keys)
    except AttributeError:
        raise TypeError("encoder expected a mapping type but got: %r" % dict)

    length = len(elements) + 5
    if length > 4 * 1024 * 1024:
        raise InvalidDocument(
            "document too large - BSON documents are limited "
            "to 4 MB")
    return struct.pack("<i", length) + elements + "\x00"
Example #4
0
    def __check_bson_size(self, msg, max_size):
        """Make sure the message doesn't include BSON documents larger
        than the connected server will accept.

        :Parameters:
          - `msg`: message to check
        """
        if len(msg) == 3:
            request_id, data, max_doc_size = msg
            if max_doc_size > max_size:
                raise InvalidDocument("BSON document too large (%d bytes)"
                                      " - the connected server supports"
                                      " BSON document sizes up to %d"
                                      " bytes." % (max_doc_size, max_size))
            return (request_id, data)
        # get_more and kill_cursors messages
        # don't include BSON documents.
        return msg
Example #5
0
def _element_to_bson(key, value, check_keys):
    if not isinstance(key, basestring):
        raise InvalidDocument("documents must have only string keys, "
                              "key was %r" % key)

    if check_keys:
        if key.startswith("$"):
            raise InvalidName("key %r must not start with '$'" % key)
        if "." in key:
            raise InvalidName("key %r must not contain '.'" % key)

    name = _make_c_string(key, True)
    if isinstance(value, float):
        return "\x01" + name + struct.pack("<d", value)

    # Use Binary w/ subtype 3 for UUID instances
    try:
        import uuid

        if isinstance(value, uuid.UUID):
            value = Binary(value.bytes, subtype=3)
    except ImportError:
        pass

    if isinstance(value, Binary):
        subtype = value.subtype
        if subtype == 2:
            value = struct.pack("<i", len(value)) + value
        return "\x05%s%s%s%s" % (name, struct.pack(
            "<i", len(value)), chr(subtype), value)
    if isinstance(value, Code):
        cstring = _make_c_string(value)
        scope = _dict_to_bson(value.scope, False, False)
        full_length = struct.pack("<i", 8 + len(cstring) + len(scope))
        length = struct.pack("<i", len(cstring))
        return "\x0F" + name + full_length + length + cstring + scope
    if isinstance(value, str):
        cstring = _make_c_string(value)
        length = struct.pack("<i", len(cstring))
        return "\x02" + name + length + cstring
    if isinstance(value, unicode):
        cstring = _make_c_string(value)
        length = struct.pack("<i", len(cstring))
        return "\x02" + name + length + cstring
    if isinstance(value, dict):
        return "\x03" + name + _dict_to_bson(value, check_keys, False)
    if isinstance(value, (list, tuple)):
        as_dict = SON(zip([str(i) for i in range(len(value))], value))
        return "\x04" + name + _dict_to_bson(as_dict, check_keys, False)
    if isinstance(value, ObjectId):
        return "\x07" + name + value.binary
    if value is True:
        return "\x08" + name + "\x01"
    if value is False:
        return "\x08" + name + "\x00"
    if isinstance(value, (int, long)):
        # TODO this is a really ugly way to check for this...
        if value > 2**64 / 2 - 1 or value < -2**64 / 2:
            raise OverflowError("MongoDB can only handle up to 8-byte ints")
        if value > 2**32 / 2 - 1 or value < -2**32 / 2:
            return "\x12" + name + struct.pack("<q", value)
        return "\x10" + name + struct.pack("<i", value)
    if isinstance(value, datetime.datetime):
        millis = int(
            calendar.timegm(value.timetuple()) * 1000 +
            value.microsecond / 1000)
        return "\x09" + name + struct.pack("<q", millis)
    if value is None:
        return "\x0A" + name
    if isinstance(value, _RE_TYPE):
        pattern = value.pattern
        flags = ""
        if value.flags & re.IGNORECASE:
            flags += "i"
        if value.flags & re.LOCALE:
            flags += "l"
        if value.flags & re.MULTILINE:
            flags += "m"
        if value.flags & re.DOTALL:
            flags += "s"
        if value.flags & re.UNICODE:
            flags += "u"
        if value.flags & re.VERBOSE:
            flags += "x"
        return "\x0B" + name + _make_c_string(pattern, True) + \
            _make_c_string(flags)
    if isinstance(value, DBRef):
        return _element_to_bson(key, value.as_doc(), False)

    raise InvalidDocument("cannot convert value of type %s to bson" %
                          type(value))
Example #6
0
    idx = 0
    idx_offset = 0
    has_docs = False
    for doc in docs:
        has_docs = True
        # Encode the current operation
        key = b(str(idx))
        value = bson.BSON.encode(doc, check_keys, uuid_subtype)
        # Send a batch?
        if (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size:
            if not idx:
                if operation == _INSERT:
                    raise InvalidDocument("BSON document too large (%d bytes)"
                                          " - the connected server supports"
                                          " BSON document sizes up to %d"
                                          " bytes." % (len(value),
                                                       max_bson_size))
                # There's nothing intelligent we can say
                # about size for update and remove
                raise InvalidDocument("command document too large")
            errors, result = send_message()
            results.append((idx_offset, result))
            if errors and ordered:
                return results

            # Truncate back to the start of list elements
            buf.seek(list_start + 4)
            buf.truncate()
            idx_offset += idx
            idx = 0