def _do_batched_insert(collection_name, docs, check_keys, safe, last_error_args, continue_on_error, uuid_subtype, client): """Insert `docs` using multiple batches. """ def _insert_message(insert_message, send_safe): """Build the insert message with header and GLE. """ request_id, final_message = __pack_message(2002, insert_message) if send_safe: request_id, error_message, _ = __last_error( collection_name, last_error_args) final_message += error_message return request_id, final_message if not docs: raise InvalidOperation("cannot do an empty bulk insert") last_error = None begin = struct.pack("<i", int(continue_on_error)) begin += bson._make_c_string(collection_name) message_length = len(begin) data = [begin] for doc in docs: encoded = bson.BSON.encode(doc, check_keys, uuid_subtype) encoded_length = len(encoded) if encoded_length > client.max_bson_size: raise InvalidDocument("BSON document too large (%d bytes)" " - the connected server supports" " BSON document sizes up to %d" " bytes." % (encoded_length, client.max_bson_size)) message_length += encoded_length if message_length < client.max_message_size: data.append(encoded) continue # We have enough data, send this message. send_safe = safe or not continue_on_error try: client._send_message(_insert_message(EMPTY.join(data), send_safe), send_safe) # Exception type could be OperationFailure or a subtype # (e.g. DuplicateKeyError) except OperationFailure, exc: # Like it says, continue on error... if continue_on_error: # Store exception details to re-raise after the final batch. last_error = exc # With unacknowledged writes just return at the first error. elif not safe: return # With acknowledged writes raise immediately. else: raise message_length = len(begin) + encoded_length data = [begin, encoded]
def _make_c_string(string, check_null=False): if check_null and "\x00" in string: raise InvalidDocument("BSON keys / regex patterns must not " "contain a NULL character") if isinstance(string, unicode): return string.encode("utf-8") + "\x00" else: try: string.decode("utf-8") return string + "\x00" except: raise InvalidStringData("strings in documents must be valid " "UTF-8: %r" % string)
def _dict_to_bson(dict, check_keys, top_level=True): try: elements = "" if top_level and "_id" in dict: elements += _element_to_bson("_id", dict["_id"], False) for (key, value) in dict.iteritems(): if not top_level or key != "_id": elements += _element_to_bson(key, value, check_keys) except AttributeError: raise TypeError("encoder expected a mapping type but got: %r" % dict) length = len(elements) + 5 if length > 4 * 1024 * 1024: raise InvalidDocument( "document too large - BSON documents are limited " "to 4 MB") return struct.pack("<i", length) + elements + "\x00"
def __check_bson_size(self, msg, max_size): """Make sure the message doesn't include BSON documents larger than the connected server will accept. :Parameters: - `msg`: message to check """ if len(msg) == 3: request_id, data, max_doc_size = msg if max_doc_size > max_size: raise InvalidDocument("BSON document too large (%d bytes)" " - the connected server supports" " BSON document sizes up to %d" " bytes." % (max_doc_size, max_size)) return (request_id, data) # get_more and kill_cursors messages # don't include BSON documents. return msg
def _element_to_bson(key, value, check_keys): if not isinstance(key, basestring): raise InvalidDocument("documents must have only string keys, " "key was %r" % key) if check_keys: if key.startswith("$"): raise InvalidName("key %r must not start with '$'" % key) if "." in key: raise InvalidName("key %r must not contain '.'" % key) name = _make_c_string(key, True) if isinstance(value, float): return "\x01" + name + struct.pack("<d", value) # Use Binary w/ subtype 3 for UUID instances try: import uuid if isinstance(value, uuid.UUID): value = Binary(value.bytes, subtype=3) except ImportError: pass if isinstance(value, Binary): subtype = value.subtype if subtype == 2: value = struct.pack("<i", len(value)) + value return "\x05%s%s%s%s" % (name, struct.pack( "<i", len(value)), chr(subtype), value) if isinstance(value, Code): cstring = _make_c_string(value) scope = _dict_to_bson(value.scope, False, False) full_length = struct.pack("<i", 8 + len(cstring) + len(scope)) length = struct.pack("<i", len(cstring)) return "\x0F" + name + full_length + length + cstring + scope if isinstance(value, str): cstring = _make_c_string(value) length = struct.pack("<i", len(cstring)) return "\x02" + name + length + cstring if isinstance(value, unicode): cstring = _make_c_string(value) length = struct.pack("<i", len(cstring)) return "\x02" + name + length + cstring if isinstance(value, dict): return "\x03" + name + _dict_to_bson(value, check_keys, False) if isinstance(value, (list, tuple)): as_dict = SON(zip([str(i) for i in range(len(value))], value)) return "\x04" + name + _dict_to_bson(as_dict, check_keys, False) if isinstance(value, ObjectId): return "\x07" + name + value.binary if value is True: return "\x08" + name + "\x01" if value is False: return "\x08" + name + "\x00" if isinstance(value, (int, long)): # TODO this is a really ugly way to check for this... if value > 2**64 / 2 - 1 or value < -2**64 / 2: raise OverflowError("MongoDB can only handle up to 8-byte ints") if value > 2**32 / 2 - 1 or value < -2**32 / 2: return "\x12" + name + struct.pack("<q", value) return "\x10" + name + struct.pack("<i", value) if isinstance(value, datetime.datetime): millis = int( calendar.timegm(value.timetuple()) * 1000 + value.microsecond / 1000) return "\x09" + name + struct.pack("<q", millis) if value is None: return "\x0A" + name if isinstance(value, _RE_TYPE): pattern = value.pattern flags = "" if value.flags & re.IGNORECASE: flags += "i" if value.flags & re.LOCALE: flags += "l" if value.flags & re.MULTILINE: flags += "m" if value.flags & re.DOTALL: flags += "s" if value.flags & re.UNICODE: flags += "u" if value.flags & re.VERBOSE: flags += "x" return "\x0B" + name + _make_c_string(pattern, True) + \ _make_c_string(flags) if isinstance(value, DBRef): return _element_to_bson(key, value.as_doc(), False) raise InvalidDocument("cannot convert value of type %s to bson" % type(value))
idx = 0 idx_offset = 0 has_docs = False for doc in docs: has_docs = True # Encode the current operation key = b(str(idx)) value = bson.BSON.encode(doc, check_keys, uuid_subtype) # Send a batch? if (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size: if not idx: if operation == _INSERT: raise InvalidDocument("BSON document too large (%d bytes)" " - the connected server supports" " BSON document sizes up to %d" " bytes." % (len(value), max_bson_size)) # There's nothing intelligent we can say # about size for update and remove raise InvalidDocument("command document too large") errors, result = send_message() results.append((idx_offset, result)) if errors and ordered: return results # Truncate back to the start of list elements buf.seek(list_start + 4) buf.truncate() idx_offset += idx idx = 0