def _op_msg_no_header(flags, command, identifier, docs, opts): """Get a OP_MSG message. Note: this method handles multiple documents in a type one payload but it does not perform batch splitting and the total message size is only checked *after* generating the entire message. """ # Encode the command document in payload 0 without checking keys. encoded = _dict_to_bson(command, False, opts) flags_type = _pack_op_msg_flags_type(flags, 0) total_size = len(encoded) max_doc_size = 0 if identifier: type_one = _pack_byte(1) cstring = _make_c_string(identifier) encoded_docs = [_dict_to_bson(doc, False, opts) for doc in docs] size = len(cstring) + sum(len(doc) for doc in encoded_docs) + 4 encoded_size = _pack_int(size) total_size += size max_doc_size = max(len(doc) for doc in encoded_docs) data = [flags_type, encoded, type_one, encoded_size, cstring ] + encoded_docs else: data = [flags_type, encoded] return b"".join(data), total_size, max_doc_size
def test_incorrect_sub_dtype4(self): # Sub document not a document bad_doc = bson.SON([("x", bson.SON([("y", 0), ("z", 0)])), ("q", 10)]) bad_raw_docs = self.raw_docs[:9] bad_raw_docs.append( bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS)) with self.assertRaisesPattern( bsonnumpy.error, "invalid document: expected subdoc from dtype," " got other type"): bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype_sub, 10) bad_doc = bson.SON([("x", bson.SON([("y", 0), ("z", 0)])), ("q", [10, 11, 12])]) bad_raw_docs = self.raw_docs[:9] bad_raw_docs.append( bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS)) with self.assertRaisesPattern( bsonnumpy.error, "invalid document: expected subdoc from dtype," " got other type"): bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype_sub, 10)
def _batched_op_msg_impl(operation, command, docs, ack, opts, ctx, buf): """Create a batched OP_MSG write.""" max_bson_size = ctx.max_bson_size max_write_batch_size = ctx.max_write_batch_size max_message_size = ctx.max_message_size flags = b"\x00\x00\x00\x00" if ack else b"\x02\x00\x00\x00" # Flags buf.write(flags) # Type 0 Section buf.write(b"\x00") buf.write(_dict_to_bson(command, False, opts)) # Type 1 Section buf.write(b"\x01") size_location = buf.tell() # Save space for size buf.write(b"\x00\x00\x00\x00") try: buf.write(_OP_MSG_MAP[operation]) except KeyError: raise InvalidOperation("Unknown command") to_send = [] idx = 0 for doc in docs: # Encode the current operation value = _dict_to_bson(doc, False, opts) doc_length = len(value) new_message_size = buf.tell() + doc_length # Does first document exceed max_message_size? doc_too_large = idx == 0 and (new_message_size > max_message_size) # When OP_MSG is used unacknowleged we have to check # document size client side or applications won't be notified. # Otherwise we let the server deal with documents that are too large # since ordered=False causes those documents to be skipped instead of # halting the bulk write operation. unacked_doc_too_large = not ack and (doc_length > max_bson_size) if doc_too_large or unacked_doc_too_large: write_op = list(_FIELD_MAP.keys())[operation] _raise_document_too_large(write_op, len(value), max_bson_size) # We have enough data, return this batch. if new_message_size > max_message_size: break buf.write(value) to_send.append(doc) idx += 1 # We have enough documents, return this batch. if idx == max_write_batch_size: break # Write type 1 section size length = buf.tell() buf.seek(size_location) buf.write(_pack_int(length - size_location)) return to_send, length
def _query_impl(options, collection_name, num_to_skip, num_to_return, query, field_selector, opts, check_keys): """Get an OP_QUERY message.""" encoded = _dict_to_bson(query, check_keys, opts) if field_selector: efs = _dict_to_bson(field_selector, False, opts) else: efs = b"" max_bson_size = max(len(encoded), len(efs)) return b"".join([ _pack_int(options), _make_c_string(collection_name), _pack_int(num_to_skip), _pack_int(num_to_return), encoded, efs ]), max_bson_size
def test_array_scalar_load2(self): # Test sub arrays with documents as elements son_docs = [ bson.SON( [('x', [ [ bson.SON([('a', i), ('b', i)]), bson.SON([('a', -i), ('b', -i)]) ], [ bson.SON([('c', i), ('d', i)]), bson.SON([('c', -i), ('d', -i)]) ], ])]) for i in range(2, 4)] raw_docs = [bson._dict_to_bson( doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs] sub_sub_dtype = np.dtype(([('a', 'int32'), ('b', 'int32')], 2)) sub_dtype = np.dtype((sub_sub_dtype, 2)) dtype = np.dtype([('x', sub_dtype)]) ndarray = np.array( [[([(i, i), (-i, -i)],), ([(i, i), (-i, -i)],)] for i in range(2, 4)], dtype) # Correct dtype with self.assertRaisesPattern(bsonnumpy.error, r'unsupported BSON type: unknown'): bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 2)
def test_null(self): data = bson._dict_to_bson({"x": None}, True, bson.DEFAULT_CODEC_OPTIONS) with self.assertRaisesPattern(bsonnumpy.error, r'unsupported BSON type: Null'): bsonnumpy.sequence_to_ndarray(iter([data]), np.dtype([('x', '<V10')]), 1)
def test_array_scalar_load4(self): # Test documents with multiple levels of sub documents son_docs = [ bson.SON( [('x', [ [ bson.SON([('a', i), ('b', i)]), bson.SON([('a', -i), ('b', -i)]) ], [ bson.SON([('c', i), ('d', i)]), bson.SON([('c', -i), ('d', -i)]) ], ])]) for i in range(10)] raw_docs = [bson._dict_to_bson( doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs] sub_sub_sub_dtype = np.dtype([('q', 'int32')]) sub_sub_dtype = np.dtype( ([('a', sub_sub_sub_dtype), ('b', sub_sub_sub_dtype)], 2)) sub_dtype = np.dtype((sub_sub_dtype, 2)) dtype = np.dtype([('x', sub_dtype)]) # Correct dtype with self.assertRaisesPattern(bsonnumpy.error, r'unsupported BSON type: unknown'): bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 4)
def encode(self): """ Returns the encoded representation of this message. """ bodybytes = _dict_to_bson(self, False) self.hdr.length = len(bodybytes) return self.hdr.encode() + bodybytes
def encrypt(self, database, cmd, check_keys, codec_options): """Encrypt a MongoDB command. :Parameters: - `services`: The services for this command. - `cmd`: A command document. - `check_keys`: If True, check `cmd` for invalid keys. - `codec_options`: The CodecOptions to use while encoding `cmd`. :Returns: The encrypted command to execute. """ self._check_closed() # Workaround for $clusterTime which is incompatible with # check_keys. cluster_time = check_keys and cmd.pop('$clusterTime', None) encoded_cmd = _dict_to_bson(cmd, check_keys, codec_options) max_cmd_size = _MAX_ENC_BSON_SIZE + _COMMAND_OVERHEAD if len(encoded_cmd) > max_cmd_size: raise _raise_document_too_large( next(iter(cmd)), len(encoded_cmd), max_cmd_size) with _wrap_encryption_errors(): encrypted_cmd = self._auto_encrypter.encrypt(database, encoded_cmd) # TODO: PYTHON-1922 avoid decoding the encrypted_cmd. encrypt_cmd = _inflate_bson( encrypted_cmd, DEFAULT_RAW_BSON_OPTIONS) if cluster_time: encrypt_cmd['$clusterTime'] = cluster_time return encrypt_cmd
def encrypt(self, database, cmd, check_keys, codec_options): """Encrypt a MongoDB command. :Parameters: - `database`: The database for this command. - `cmd`: A command document. - `check_keys`: If True, check `cmd` for invalid keys. - `codec_options`: The CodecOptions to use while encoding `cmd`. :Returns: The encrypted command to execute. """ self._check_closed() # Workaround for $clusterTime which is incompatible with # check_keys. cluster_time = check_keys and cmd.pop('$clusterTime', None) encoded_cmd = _dict_to_bson(cmd, check_keys, codec_options) with _wrap_encryption_errors(): encrypted_cmd = self._auto_encrypter.encrypt(database, encoded_cmd) # TODO: PYTHON-1922 avoid decoding the encrypted_cmd. encrypt_cmd = _inflate_bson(encrypted_cmd, DEFAULT_RAW_BSON_OPTIONS) if cluster_time: encrypt_cmd['$clusterTime'] = cluster_time return encrypt_cmd
def test_deeply_nested_array(self): # arrays of length 1 are maintained when they are within another array dtype = np.dtype([("a", "(3,2,1)int32"), ("b", "(3,2,1)int32")]) doc = bson.SON([("a", [[[9], [9]], [[8], [8]], [[7], [7]]]), ("b", [[[6], [6]], [[5], [5]], [[4], [4]]])]) utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS) result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1) self.assertEqual(dtype, result.dtype) self.assertTrue( np.array_equal( result, np.array([([[[9], [9]], [[8], [8]], [[7], [7]] ], [[[6], [6]], [[5], [5]], [[4], [4]]])], dtype))) dtype = np.dtype([("a", "(3,1)int32"), ("b", "(3,1)int32"), ("c", "(3,1)int32")]) doc = bson.SON([("a", [[9], [8], [7]]), ("b", [[6], [5], [4]]), ("c", [[3], [2], [1]])]) utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS) result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1) self.assertEqual(dtype, result.dtype) self.assertTrue( np.array_equal( result, np.array([([[9], [8], [7]], [[6], [5], [4]], [[3], [2], [1]])], dtype))) dtype = np.dtype([("a", "2int32")]) doc = bson.SON([("a", [7, 7])]) utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS) result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1) self.assertEqual(dtype, result.dtype) self.assertTrue(np.array_equal(result, np.array([([7, 7], )], dtype))) dtype = np.dtype([("a", "(2,1,1,1)int32")]) doc = bson.SON([("a", [[[[99]]], [[[88]]]])]) utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS) result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1) self.assertEqual(dtype, result.dtype) self.assertTrue( np.array_equal(result, np.array([([[[[99]]], [[[88]]]], )], dtype)))
def _write(self, obj): if isinstance(obj, dict): self._validate_write(obj) self.fh.write(_dict_to_bson(obj, False)) else: #raise Exception("Can only write a Dict. No support for direct BSON Serialization of '%s'" % type(obj)) #print >> sys.stderr, "Bare (or non-dict) output value '%s' found. Wrapping in a BSON object 'value' field." % obj self._write({'value': obj})
def _delete(collection_name, spec, opts, flags): """Get an OP_DELETE message.""" encoded = _dict_to_bson(spec, False, opts) # Uses extensions. return b"".join([ _ZERO_32, _make_c_string(collection_name), _pack_int(flags), encoded]), len(encoded)
def encode(self): """ Returns a string. """ if self._body is not None: self._bodybytes = _dict_to_bson(self._body, False) self.hdr.length = len(self._bodybytes) return self.hdr.encode() + self._bodybytes
def compare_seq_to_ndarray_result(self, np_type, document): data = bson._dict_to_bson(document, False, bson.DEFAULT_CODEC_OPTIONS) dtype = np.dtype(np_type) result = bsonnumpy.sequence_to_ndarray([data], dtype, 1) self.assertEqual(result.dtype, dtype) for key in document: self.assertEqual(result[0][key], document[key], "Comparison failed for type %s: %s != %s" % ( dtype, result[0][key], document[key]))
def _test_error(self, value, bson_type_name, codes): data = bson._dict_to_bson({'x': value}, True, bson.DEFAULT_CODEC_OPTIONS) for code in codes: dtype = np.dtype([('x', code)]) expected = "cannot convert %s to dtype" % bson_type_name with self.assertRaisesPattern(bsonnumpy.error, expected): bsonnumpy.sequence_to_ndarray(iter([data]), dtype, 1)
def test(self): data = bson._dict_to_bson( {"a": value}, True, # check_keys bson.DEFAULT_CODEC_OPTIONS) with self.assertRaises(bsonnumpy.error) as context: bsonnumpy.sequence_to_ndarray([data], np.dtype([("a", dtype)]), 1) self.assertIn("unsupported BSON type: %s" % type_name, str(context.exception))
def test_incorrect_sub_dtype_array7(self): # Sub array too short bad_doc = bson.SON([("x", [['d' * 1, 'd' * 2], ['d' * 4, 'd' * 5]]), ("y", [['d' * 7, 'd' * 8], ['d' * 10, 'd' * 11]])]) bad_raw_docs = self.raw_docs[:3] bad_raw_docs.append( bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS)) with self.assertRaisesPattern( bsonnumpy.error, "invalid document: array is of incorrect length"): bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype, 4)
def test_incorrect_sub_dtype_array1(self): # Top document missing key bad_doc = bson.SON([("x", [['d' * 1, 'd' * 2, 'd' * 3], ['d' * 4, 'd' * 5, 'd' * 6]]), ("bad_key", [['d' * 7, 'd' * 7, 'd' * 9], ['d' * 10, 'd' * 11, 'd' * 12]])]) bad_raw_docs = self.raw_docs[:3] bad_raw_docs.append( bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS)) with self.assertRaisesPattern(bsonnumpy.error, "document does not match dtype"): bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype, 4)
def test_incorrect_sub_dtype3(self): # Sub document missing key bad_doc = bson.SON([("x", bson.SON([("bad", 0), ("z", 0)])), ("q", bson.SON([("y", 0), ("z", 0)]))]) bad_raw_docs = self.raw_docs[:9] bad_raw_docs.append( bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS)) with self.assertRaisesPattern(bsonnumpy.error, "document does not match dtype"): bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype_sub, 10)
def test_incorrect_sub_dtype_array2(self): # Top-level array not array bad_doc = bson.SON([("x", [['d' * 1, 'd' * 2, 'd' * 3], ['d' * 4, 'd' * 5, 'd' * 6]]), ("y", 'not an array')]) bad_raw_docs = self.raw_docs[:3] bad_raw_docs.append( bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS)) with self.assertRaisesPattern( bsonnumpy.error, "invalid document: expected list from dtype, got other type"): bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype, 4)
def encode_dict_list(products, output_bson_path, total=None, overwrite=False): if os.path.exists(output_bson_path) and overwrite is False: logging.info('already exists ({})'.format(output_bson_path)) return logging.info('write {} products to {}'.format(total, output_bson_path)) with open(output_bson_path, 'wb') as writer: for i, prod in tqdm(enumerate(products), unit='products', total=total or len(products), ascii=True): obj = bson._dict_to_bson(prod, False, bson.DEFAULT_CODEC_OPTIONS) writer.write(obj)
def __init__(self, client, opts): """Create a _Encrypter for a client. :Parameters: - `client`: The encrypted MongoClient. - `opts`: The encrypted client's :class:`AutoEncryptionOpts`. """ if opts._schema_map is None: schema_map = None else: schema_map = _dict_to_bson(opts._schema_map, False, _DATA_KEY_OPTS) self._bypass_auto_encryption = opts._bypass_auto_encryption self._internal_client = None def _get_internal_client(encrypter, mongo_client): if mongo_client.max_pool_size is None: # Unlimited pool size, use the same client. return mongo_client # Else - limited pool size, use an internal client. if encrypter._internal_client is not None: return encrypter._internal_client internal_client = mongo_client._duplicate( minPoolSize=0, auto_encryption_opts=None) encrypter._internal_client = internal_client return internal_client if opts._key_vault_client is not None: key_vault_client = opts._key_vault_client else: key_vault_client = _get_internal_client(self, client) if opts._bypass_auto_encryption: metadata_client = None else: metadata_client = _get_internal_client(self, client) db, coll = opts._key_vault_namespace.split('.', 1) key_vault_coll = key_vault_client[db][coll] mongocryptd_client = MongoClient( opts._mongocryptd_uri, connect=False, serverSelectionTimeoutMS=_MONGOCRYPTD_TIMEOUT_MS) io_callbacks = _EncryptionIO(metadata_client, key_vault_coll, mongocryptd_client, opts) self._auto_encrypter = AutoEncrypter( io_callbacks, MongoCryptOptions(opts._kms_providers, schema_map)) self._closed = False
def test_array_scalar_load00(self): # Test arrays with documents as elements son_docs = [ bson.SON([('x', [i, i, i, i])]) for i in range(2, 4)] raw_docs = [bson._dict_to_bson( doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs] dtype = np.dtype([('x', '4int32')]) ndarray = np.array([([i, i, i, i],) for i in range(2, 4)], dtype) # Correct dtype res = bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 2) self.assertTrue(np.array_equal(ndarray, res))
def test_incorrect_sub_dtype2(self): # Top document has extra key data = bson._dict_to_bson({ "x": 12, "y": 13 }, True, bson.DEFAULT_CODEC_OPTIONS) ndarray = bsonnumpy.sequence_to_ndarray([data], np.dtype([("y", np.int)]), 1) self.assertEqual(1, len(ndarray)) self.assertEqual(13, ndarray[0]["y"]) with self.assertRaises(ValueError): ndarray[0]["x"]
def __init__(self, io_callbacks, opts): """Encrypts and decrypts MongoDB commands. This class is used to support automatic encryption and decryption of MongoDB commands. :Parameters: - `io_callbacks`: A :class:`MongoCryptCallback`. - `opts`: The encrypted client's :class:`AutoEncryptionOpts`. """ if opts._schema_map is None: schema_map = None else: schema_map = _dict_to_bson(opts._schema_map, False, _DATA_KEY_OPTS) self._auto_encrypter = AutoEncrypter( io_callbacks, MongoCryptOptions(opts._kms_providers, schema_map)) self._bypass_auto_encryption = opts._bypass_auto_encryption
def collection_info(self, database, filter): """Get the collection info for a namespace. The returned collection info is passed to libmongocrypt which reads the JSON schema. :Parameters: - `database`: The database on which to run listCollections. - `filter`: The filter to pass to listCollections. :Returns: The first document from the listCollections command response as BSON. """ with self.client_ref()[database].list_collections( filter=RawBSONDocument(filter)) as cursor: for doc in cursor: return _dict_to_bson(doc, False, _DATA_KEY_OPTS)
def encrypt(self, database, cmd, codec_options): """Encrypt a MongoDB command. :Parameters: - `database`: The database for this command. - `cmd`: A command document. - `codec_options`: The CodecOptions to use while encoding `cmd`. :Returns: The encrypted command to execute. """ self._check_closed() encoded_cmd = _dict_to_bson(cmd, False, codec_options) with _wrap_encryption_errors(): encrypted_cmd = self._auto_encrypter.encrypt(database, encoded_cmd) # TODO: PYTHON-1922 avoid decoding the encrypted_cmd. encrypt_cmd = _inflate_bson(encrypted_cmd, DEFAULT_RAW_BSON_OPTIONS) return encrypt_cmd
def test_string_length(self): data = bson._dict_to_bson({"x": "abc"}, True, bson.DEFAULT_CODEC_OPTIONS) ndarray = bsonnumpy.sequence_to_ndarray(iter([data]), np.dtype([("x", "V1")]), 1) self.assertEqual(ndarray[0]["x"].tobytes(), b"a") ndarray = bsonnumpy.sequence_to_ndarray(iter([data]), np.dtype([("x", "V2")]), 1) self.assertEqual(ndarray[0]["x"].tobytes(), b"ab") ndarray = bsonnumpy.sequence_to_ndarray(iter([data]), np.dtype([("x", "V3")]), 1) self.assertEqual(ndarray[0]["x"].tobytes(), b"abc") ndarray = bsonnumpy.sequence_to_ndarray(iter([data]), np.dtype([("x", "V4")]), 1) self.assertEqual(ndarray[0]["x"].tobytes(), b"abc\0")
def test_array_scalar_load3(self): # Test sub arrays with documents that have arrays son_docs = [ bson.SON( [('x', [ bson.SON([('a', [i, i, i, i]), ('b', [i, i, i, i])]), bson.SON([('a', [-i, -i, -i, -i]), ('b', [-i, -i, -i, -i])]) ])]) for i in range(10)] raw_docs = [bson._dict_to_bson( doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs] sub_dtype = np.dtype(([('a', '4int32'), ('b', '4int32')], 2)) dtype = np.dtype([('x', sub_dtype)]) # Correct dtype with self.assertRaisesPattern(bsonnumpy.error, r'unsupported BSON type: Sub-document'): bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 4)
def _element_to_bson(key, value, check_keys, uuid_subtype): objtype = type(value) if isinstance(objtype,object): #objtype == types.InstanceType: objtype = value.__class__ encoder = Extension.getpickler(objtype) if not encoder and hasattr(value, '__getstate__'): for _type in type(value).__mro__: encoder = Extension.getpickler(_type) if encoder: break if encoder: code, value = encoder(value) if isinstance(value, dict): assert '__customtype__' not in value value = value.copy() value['__customtype__'] = code if isinstance(value, (list, tuple)): name = bson._make_c_string(key) as_dict = OrderedDict((str(k), v) for (k, v) in enumerate(value)) return bson.BSONARR + name + bson._dict_to_bson(as_dict, check_keys, uuid_subtype, False) return _bson_element_to_bson(key, value, check_keys, uuid_subtype)
def test__dict_to_bson(self): document = {'average': Decimal('56.47')} rawbytes = BSON.encode(document, codec_options=self.codecopts) encoded_document = _dict_to_bson(document, False, self.codecopts) self.assertEqual(encoded_document, rawbytes)
class TestNdarrayFlat(TestToNdarray): dtype = np.dtype([('x', np.int32), ('y', np.int32)]) bson_docs = [ bson._dict_to_bson(bson.SON([("x", i), ("y", -i)]), False, DEFAULT) for i in range(10) ] ndarray = np.array([(i, -i) for i in range(10)], dtype=dtype) if hasattr(unittest.TestCase, 'assertRaisesRegex'): assertRaisesPattern = unittest.TestCase.assertRaisesRegex else: assertRaisesPattern = unittest.TestCase.assertRaisesRegexp def test_incorrect_arguments(self): with self.assertRaisesPattern( bsonnumpy.error, r'sequence_to_ndarray requires a numpy\.' r'ndarray'): bsonnumpy.ndarray_to_sequence(None) with self.assertRaisesPattern( bsonnumpy.error, r'sequence_to_ndarray requires a numpy\.' r'ndarray'): bsonnumpy.ndarray_to_sequence([]) with self.assertRaisesPattern( bsonnumpy.error, r'sequence_to_ndarray requires a numpy\.' r'ndarray'): bsonnumpy.ndarray_to_sequence(1) with self.assertRaises(TypeError): bsonnumpy.ndarray_to_sequence(10, 10) def test_empty(self): dtype = np.dtype([('x', np.int32), ('y', np.float)]) batch = b''.join([ bson.BSON.encode({ "x": 1, "y": 1.1 }), bson.BSON.encode({}), bson.BSON.encode({ "x": 3, "y": 1.3 }), ]) with self.assertRaisesPattern(bsonnumpy.error, r'document does not match dtype'): bsonnumpy.sequence_to_ndarray([batch], dtype, 3) def test_raw_batch(self): dtype = np.dtype([('x', np.int32), ('y', np.float)]) # A variety of lengths. batch = b''.join([ bson.BSON.encode({ "x": 1, "y": 1.1 }), bson.BSON.encode({ "x": 2, "y": 1.2, "extra key": "foobar" }), bson.BSON.encode({ "x": 3, "y": 1.3 }), ]) result = bsonnumpy.sequence_to_ndarray([batch], dtype, 3) ndarray = np.array([(1, 1.1), (2, 1.2), (3, 1.3)], dtype) np.testing.assert_array_equal(result, ndarray) dtype = np.dtype([('x', np.int32), ('y', np.float), ('z', np.int32)]) # A variety of orders. batch = b''.join([ bson.BSON.encode(SON([("x", 1), ("y", 1.1), ("z", 4)])), bson.BSON.encode(SON([("x", 2), ("z", 5), ("y", 1.2)])), bson.BSON.encode(SON([("z", 6), ("x", 3), ("y", 1.3)])) ]) result = bsonnumpy.sequence_to_ndarray([batch], dtype, 3) ndarray = np.array([(1, 1.1, 4), (2, 1.2, 5), (3, 1.3, 6)], dtype) np.testing.assert_array_equal(result, ndarray) def test_dimensions_limit(self): # Make a deeply-nested dtype([('x', dtype([('x', dtype([('x', ... dtype = np.dtype([('y', np.int32)]) for _ in range(31): dtype = np.dtype([('x', dtype)]) # No error. bsonnumpy.sequence_to_ndarray([], dtype, 0) # One more level. dtype = np.dtype([('x', dtype)]) with self.assertRaisesPattern(bsonnumpy.error, r'exceeds 32 levels'): bsonnumpy.sequence_to_ndarray([], dtype, 0)
def test__dict_to_bson(self): document = {'average': Decimal('56.47')} rawbytes = encode(document, codec_options=self.codecopts) encoded_document = _dict_to_bson(document, False, self.codecopts) self.assertEqual(encoded_document, rawbytes)