def _op_msg_no_header(flags, command, identifier, docs, opts):
    """Get a OP_MSG message.

    Note: this method handles multiple documents in a type one payload but
    it does not perform batch splitting and the total message size is
    only checked *after* generating the entire message.
    """
    # Encode the command document in payload 0 without checking keys.
    encoded = _dict_to_bson(command, False, opts)
    flags_type = _pack_op_msg_flags_type(flags, 0)
    total_size = len(encoded)
    max_doc_size = 0
    if identifier:
        type_one = _pack_byte(1)
        cstring = _make_c_string(identifier)
        encoded_docs = [_dict_to_bson(doc, False, opts) for doc in docs]
        size = len(cstring) + sum(len(doc) for doc in encoded_docs) + 4
        encoded_size = _pack_int(size)
        total_size += size
        max_doc_size = max(len(doc) for doc in encoded_docs)
        data = [flags_type, encoded, type_one, encoded_size, cstring
                ] + encoded_docs
    else:
        data = [flags_type, encoded]
    return b"".join(data), total_size, max_doc_size
Esempio n. 2
0
    def test_incorrect_sub_dtype4(self):
        # Sub document not a document
        bad_doc = bson.SON([("x", bson.SON([("y", 0), ("z", 0)])), ("q", 10)])

        bad_raw_docs = self.raw_docs[:9]
        bad_raw_docs.append(
            bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))

        with self.assertRaisesPattern(
                bsonnumpy.error,
                "invalid document: expected subdoc from dtype,"
                " got other type"):
            bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype_sub, 10)

        bad_doc = bson.SON([("x", bson.SON([("y", 0), ("z", 0)])),
                            ("q", [10, 11, 12])])

        bad_raw_docs = self.raw_docs[:9]
        bad_raw_docs.append(
            bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))

        with self.assertRaisesPattern(
                bsonnumpy.error,
                "invalid document: expected subdoc from dtype,"
                " got other type"):
            bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype_sub, 10)
def _batched_op_msg_impl(operation, command, docs, ack, opts, ctx, buf):
    """Create a batched OP_MSG write."""
    max_bson_size = ctx.max_bson_size
    max_write_batch_size = ctx.max_write_batch_size
    max_message_size = ctx.max_message_size

    flags = b"\x00\x00\x00\x00" if ack else b"\x02\x00\x00\x00"
    # Flags
    buf.write(flags)

    # Type 0 Section
    buf.write(b"\x00")
    buf.write(_dict_to_bson(command, False, opts))

    # Type 1 Section
    buf.write(b"\x01")
    size_location = buf.tell()
    # Save space for size
    buf.write(b"\x00\x00\x00\x00")
    try:
        buf.write(_OP_MSG_MAP[operation])
    except KeyError:
        raise InvalidOperation("Unknown command")

    to_send = []
    idx = 0
    for doc in docs:
        # Encode the current operation
        value = _dict_to_bson(doc, False, opts)
        doc_length = len(value)
        new_message_size = buf.tell() + doc_length
        # Does first document exceed max_message_size?
        doc_too_large = idx == 0 and (new_message_size > max_message_size)
        # When OP_MSG is used unacknowleged we have to check
        # document size client side or applications won't be notified.
        # Otherwise we let the server deal with documents that are too large
        # since ordered=False causes those documents to be skipped instead of
        # halting the bulk write operation.
        unacked_doc_too_large = not ack and (doc_length > max_bson_size)
        if doc_too_large or unacked_doc_too_large:
            write_op = list(_FIELD_MAP.keys())[operation]
            _raise_document_too_large(write_op, len(value), max_bson_size)
        # We have enough data, return this batch.
        if new_message_size > max_message_size:
            break
        buf.write(value)
        to_send.append(doc)
        idx += 1
        # We have enough documents, return this batch.
        if idx == max_write_batch_size:
            break

    # Write type 1 section size
    length = buf.tell()
    buf.seek(size_location)
    buf.write(_pack_int(length - size_location))

    return to_send, length
def _query_impl(options, collection_name, num_to_skip, num_to_return, query,
                field_selector, opts, check_keys):
    """Get an OP_QUERY message."""
    encoded = _dict_to_bson(query, check_keys, opts)
    if field_selector:
        efs = _dict_to_bson(field_selector, False, opts)
    else:
        efs = b""
    max_bson_size = max(len(encoded), len(efs))
    return b"".join([
        _pack_int(options),
        _make_c_string(collection_name),
        _pack_int(num_to_skip),
        _pack_int(num_to_return), encoded, efs
    ]), max_bson_size
Esempio n. 5
0
    def test_array_scalar_load2(self):
        # Test sub arrays with documents as elements
        son_docs = [
            bson.SON(
                [('x', [
                    [
                        bson.SON([('a', i), ('b', i)]),
                        bson.SON([('a', -i), ('b', -i)])
                    ],
                    [
                        bson.SON([('c', i), ('d', i)]),
                        bson.SON([('c', -i), ('d', -i)])
                    ],

                ])]) for i in range(2, 4)]
        raw_docs = [bson._dict_to_bson(
            doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs]
        sub_sub_dtype = np.dtype(([('a', 'int32'), ('b', 'int32')], 2))
        sub_dtype = np.dtype((sub_sub_dtype, 2))
        dtype = np.dtype([('x', sub_dtype)])

        ndarray = np.array(
            [[([(i, i), (-i, -i)],),
              ([(i, i), (-i, -i)],)] for i in range(2, 4)], dtype)

        # Correct dtype
        with self.assertRaisesPattern(bsonnumpy.error,
                                      r'unsupported BSON type: unknown'):
            bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 2)
Esempio n. 6
0
 def test_null(self):
     data = bson._dict_to_bson({"x": None}, True,
                               bson.DEFAULT_CODEC_OPTIONS)
     with self.assertRaisesPattern(bsonnumpy.error,
                                   r'unsupported BSON type: Null'):
         bsonnumpy.sequence_to_ndarray(iter([data]),
                                       np.dtype([('x', '<V10')]), 1)
Esempio n. 7
0
    def test_array_scalar_load4(self):
        # Test documents with multiple levels of sub documents
        son_docs = [
            bson.SON(
                [('x', [
                    [
                        bson.SON([('a', i), ('b', i)]),
                        bson.SON([('a', -i), ('b', -i)])
                    ],
                    [
                        bson.SON([('c', i), ('d', i)]),
                        bson.SON([('c', -i), ('d', -i)])
                    ],

                ])]) for i in range(10)]
        raw_docs = [bson._dict_to_bson(
            doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs]
        sub_sub_sub_dtype = np.dtype([('q', 'int32')])
        sub_sub_dtype = np.dtype(
            ([('a', sub_sub_sub_dtype), ('b', sub_sub_sub_dtype)], 2))
        sub_dtype = np.dtype((sub_sub_dtype, 2))
        dtype = np.dtype([('x', sub_dtype)])

        # Correct dtype
        with self.assertRaisesPattern(bsonnumpy.error,
                                      r'unsupported BSON type: unknown'):
            bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 4)
Esempio n. 8
0
 def encode(self):
     """
     Returns the encoded representation of this message.
     """
     bodybytes = _dict_to_bson(self, False)
     self.hdr.length = len(bodybytes)
     return self.hdr.encode() + bodybytes
Esempio n. 9
0
    def encrypt(self, database, cmd, check_keys, codec_options):
        """Encrypt a MongoDB command.

        :Parameters:
          - `services`: The services for this command.
          - `cmd`: A command document.
          - `check_keys`: If True, check `cmd` for invalid keys.
          - `codec_options`: The CodecOptions to use while encoding `cmd`.

        :Returns:
          The encrypted command to execute.
        """
        self._check_closed()
        # Workaround for $clusterTime which is incompatible with
        # check_keys.
        cluster_time = check_keys and cmd.pop('$clusterTime', None)
        encoded_cmd = _dict_to_bson(cmd, check_keys, codec_options)
        max_cmd_size = _MAX_ENC_BSON_SIZE + _COMMAND_OVERHEAD
        if len(encoded_cmd) > max_cmd_size:
            raise _raise_document_too_large(
                next(iter(cmd)), len(encoded_cmd), max_cmd_size)
        with _wrap_encryption_errors():
            encrypted_cmd = self._auto_encrypter.encrypt(database, encoded_cmd)
            # TODO: PYTHON-1922 avoid decoding the encrypted_cmd.
            encrypt_cmd = _inflate_bson(
                encrypted_cmd, DEFAULT_RAW_BSON_OPTIONS)
            if cluster_time:
                encrypt_cmd['$clusterTime'] = cluster_time
            return encrypt_cmd
Esempio n. 10
0
    def encrypt(self, database, cmd, check_keys, codec_options):
        """Encrypt a MongoDB command.

        :Parameters:
          - `database`: The database for this command.
          - `cmd`: A command document.
          - `check_keys`: If True, check `cmd` for invalid keys.
          - `codec_options`: The CodecOptions to use while encoding `cmd`.

        :Returns:
          The encrypted command to execute.
        """
        self._check_closed()
        # Workaround for $clusterTime which is incompatible with
        # check_keys.
        cluster_time = check_keys and cmd.pop('$clusterTime', None)
        encoded_cmd = _dict_to_bson(cmd, check_keys, codec_options)
        with _wrap_encryption_errors():
            encrypted_cmd = self._auto_encrypter.encrypt(database, encoded_cmd)
            # TODO: PYTHON-1922 avoid decoding the encrypted_cmd.
            encrypt_cmd = _inflate_bson(encrypted_cmd,
                                        DEFAULT_RAW_BSON_OPTIONS)
            if cluster_time:
                encrypt_cmd['$clusterTime'] = cluster_time
            return encrypt_cmd
    def test_deeply_nested_array(self):
        # arrays of length 1 are maintained when they are within another array
        dtype = np.dtype([("a", "(3,2,1)int32"), ("b", "(3,2,1)int32")])

        doc = bson.SON([("a", [[[9], [9]], [[8], [8]], [[7], [7]]]),
                        ("b", [[[6], [6]], [[5], [5]], [[4], [4]]])])

        utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS)
        result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1)
        self.assertEqual(dtype, result.dtype)
        self.assertTrue(
            np.array_equal(
                result,
                np.array([([[[9], [9]], [[8], [8]], [[7], [7]]
                            ], [[[6], [6]], [[5], [5]], [[4], [4]]])], dtype)))

        dtype = np.dtype([("a", "(3,1)int32"), ("b", "(3,1)int32"),
                          ("c", "(3,1)int32")])

        doc = bson.SON([("a", [[9], [8], [7]]), ("b", [[6], [5], [4]]),
                        ("c", [[3], [2], [1]])])

        utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS)
        result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1)
        self.assertEqual(dtype, result.dtype)
        self.assertTrue(
            np.array_equal(
                result,
                np.array([([[9], [8], [7]], [[6], [5], [4]], [[3], [2], [1]])],
                         dtype)))

        dtype = np.dtype([("a", "2int32")])
        doc = bson.SON([("a", [7, 7])])
        utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS)
        result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1)
        self.assertEqual(dtype, result.dtype)
        self.assertTrue(np.array_equal(result, np.array([([7, 7], )], dtype)))

        dtype = np.dtype([("a", "(2,1,1,1)int32")])
        doc = bson.SON([("a", [[[[99]]], [[[88]]]])])
        utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS)

        result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1)
        self.assertEqual(dtype, result.dtype)
        self.assertTrue(
            np.array_equal(result, np.array([([[[[99]]], [[[88]]]], )],
                                            dtype)))
Esempio n. 12
0
 def _write(self, obj):
     if isinstance(obj, dict):
         self._validate_write(obj)
         self.fh.write(_dict_to_bson(obj, False))
     else:
         #raise Exception("Can only write a Dict. No support for direct BSON Serialization of '%s'" % type(obj))
         #print >> sys.stderr, "Bare (or non-dict) output value '%s' found.  Wrapping in a BSON object 'value' field." % obj
         self._write({'value': obj})
Esempio n. 13
0
def _delete(collection_name, spec, opts, flags):
    """Get an OP_DELETE message."""
    encoded = _dict_to_bson(spec, False, opts)  # Uses extensions.
    return b"".join([
        _ZERO_32,
        _make_c_string(collection_name),
        _pack_int(flags),
        encoded]), len(encoded)
Esempio n. 14
0
 def encode(self):
     """
     Returns a string.
     """
     if self._body is not None:
         self._bodybytes = _dict_to_bson(self._body, False)
         self.hdr.length = len(self._bodybytes)
     return self.hdr.encode() + self._bodybytes
Esempio n. 15
0
 def compare_seq_to_ndarray_result(self, np_type, document):
     data = bson._dict_to_bson(document, False, bson.DEFAULT_CODEC_OPTIONS)
     dtype = np.dtype(np_type)
     result = bsonnumpy.sequence_to_ndarray([data], dtype, 1)
     self.assertEqual(result.dtype, dtype)
     for key in document:
         self.assertEqual(result[0][key], document[key],
                          "Comparison failed for type %s: %s != %s" % (
                              dtype, result[0][key], document[key]))
Esempio n. 16
0
    def _test_error(self, value, bson_type_name, codes):
        data = bson._dict_to_bson({'x': value}, True,
                                  bson.DEFAULT_CODEC_OPTIONS)

        for code in codes:
            dtype = np.dtype([('x', code)])
            expected = "cannot convert %s to dtype" % bson_type_name

            with self.assertRaisesPattern(bsonnumpy.error, expected):
                bsonnumpy.sequence_to_ndarray(iter([data]), dtype, 1)
Esempio n. 17
0
    def test(self):
        data = bson._dict_to_bson(
            {"a": value},
            True,  # check_keys
            bson.DEFAULT_CODEC_OPTIONS)

        with self.assertRaises(bsonnumpy.error) as context:
            bsonnumpy.sequence_to_ndarray([data], np.dtype([("a", dtype)]), 1)

        self.assertIn("unsupported BSON type: %s" % type_name,
                      str(context.exception))
Esempio n. 18
0
 def test_incorrect_sub_dtype_array7(self):
     # Sub array too short
     bad_doc = bson.SON([("x", [['d' * 1, 'd' * 2], ['d' * 4, 'd' * 5]]),
                         ("y", [['d' * 7, 'd' * 8], ['d' * 10, 'd' * 11]])])
     bad_raw_docs = self.raw_docs[:3]
     bad_raw_docs.append(
         bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))
     with self.assertRaisesPattern(
             bsonnumpy.error,
             "invalid document: array is of incorrect length"):
         bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype, 4)
Esempio n. 19
0
 def test_incorrect_sub_dtype_array1(self):
     # Top document missing key
     bad_doc = bson.SON([("x", [['d' * 1, 'd' * 2, 'd' * 3],
                                ['d' * 4, 'd' * 5, 'd' * 6]]),
                         ("bad_key", [['d' * 7, 'd' * 7, 'd' * 9],
                                      ['d' * 10, 'd' * 11, 'd' * 12]])])
     bad_raw_docs = self.raw_docs[:3]
     bad_raw_docs.append(
         bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))
     with self.assertRaisesPattern(bsonnumpy.error,
                                   "document does not match dtype"):
         bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype, 4)
Esempio n. 20
0
    def test_incorrect_sub_dtype3(self):
        # Sub document missing key
        bad_doc = bson.SON([("x", bson.SON([("bad", 0), ("z", 0)])),
                            ("q", bson.SON([("y", 0), ("z", 0)]))])

        bad_raw_docs = self.raw_docs[:9]
        bad_raw_docs.append(
            bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))

        with self.assertRaisesPattern(bsonnumpy.error,
                                      "document does not match dtype"):
            bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype_sub, 10)
Esempio n. 21
0
    def test_incorrect_sub_dtype_array2(self):
        # Top-level array not array
        bad_doc = bson.SON([("x", [['d' * 1, 'd' * 2, 'd' * 3],
                                   ['d' * 4, 'd' * 5, 'd' * 6]]),
                            ("y", 'not an array')])
        bad_raw_docs = self.raw_docs[:3]
        bad_raw_docs.append(
            bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))

        with self.assertRaisesPattern(
                bsonnumpy.error,
                "invalid document: expected list from dtype, got other type"):
            bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype, 4)
Esempio n. 22
0
def encode_dict_list(products, output_bson_path, total=None, overwrite=False):
    if os.path.exists(output_bson_path) and overwrite is False:
        logging.info('already exists ({})'.format(output_bson_path))
        return

    logging.info('write {} products to {}'.format(total, output_bson_path))
    with open(output_bson_path, 'wb') as writer:
        for i, prod in tqdm(enumerate(products),
                            unit='products',
                            total=total or len(products),
                            ascii=True):
            obj = bson._dict_to_bson(prod, False, bson.DEFAULT_CODEC_OPTIONS)
            writer.write(obj)
Esempio n. 23
0
    def __init__(self, client, opts):
        """Create a _Encrypter for a client.

        :Parameters:
          - `client`: The encrypted MongoClient.
          - `opts`: The encrypted client's :class:`AutoEncryptionOpts`.
        """
        if opts._schema_map is None:
            schema_map = None
        else:
            schema_map = _dict_to_bson(opts._schema_map, False, _DATA_KEY_OPTS)
        self._bypass_auto_encryption = opts._bypass_auto_encryption
        self._internal_client = None

        def _get_internal_client(encrypter, mongo_client):
            if mongo_client.max_pool_size is None:
                # Unlimited pool size, use the same client.
                return mongo_client
            # Else - limited pool size, use an internal client.
            if encrypter._internal_client is not None:
                return encrypter._internal_client
            internal_client = mongo_client._duplicate(
                minPoolSize=0, auto_encryption_opts=None)
            encrypter._internal_client = internal_client
            return internal_client

        if opts._key_vault_client is not None:
            key_vault_client = opts._key_vault_client
        else:
            key_vault_client = _get_internal_client(self, client)

        if opts._bypass_auto_encryption:
            metadata_client = None
        else:
            metadata_client = _get_internal_client(self, client)

        db, coll = opts._key_vault_namespace.split('.', 1)
        key_vault_coll = key_vault_client[db][coll]

        mongocryptd_client = MongoClient(
            opts._mongocryptd_uri,
            connect=False,
            serverSelectionTimeoutMS=_MONGOCRYPTD_TIMEOUT_MS)

        io_callbacks = _EncryptionIO(metadata_client, key_vault_coll,
                                     mongocryptd_client, opts)
        self._auto_encrypter = AutoEncrypter(
            io_callbacks, MongoCryptOptions(opts._kms_providers, schema_map))
        self._closed = False
Esempio n. 24
0
    def test_array_scalar_load00(self):
        # Test arrays with documents as elements

        son_docs = [
            bson.SON([('x', [i, i, i, i])]) for i in range(2, 4)]
        raw_docs = [bson._dict_to_bson(
            doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs]
        dtype = np.dtype([('x', '4int32')])

        ndarray = np.array([([i, i, i, i],) for i in range(2, 4)], dtype)

        # Correct dtype
        res = bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 2)

        self.assertTrue(np.array_equal(ndarray, res))
Esempio n. 25
0
    def test_incorrect_sub_dtype2(self):
        # Top document has extra key
        data = bson._dict_to_bson({
            "x": 12,
            "y": 13
        }, True, bson.DEFAULT_CODEC_OPTIONS)

        ndarray = bsonnumpy.sequence_to_ndarray([data],
                                                np.dtype([("y", np.int)]), 1)

        self.assertEqual(1, len(ndarray))
        self.assertEqual(13, ndarray[0]["y"])

        with self.assertRaises(ValueError):
            ndarray[0]["x"]
Esempio n. 26
0
    def __init__(self, io_callbacks, opts):
        """Encrypts and decrypts MongoDB commands.

        This class is used to support automatic encryption and decryption of
        MongoDB commands.

        :Parameters:
          - `io_callbacks`: A :class:`MongoCryptCallback`.
          - `opts`: The encrypted client's :class:`AutoEncryptionOpts`.
        """
        if opts._schema_map is None:
            schema_map = None
        else:
            schema_map = _dict_to_bson(opts._schema_map, False, _DATA_KEY_OPTS)
        self._auto_encrypter = AutoEncrypter(
            io_callbacks, MongoCryptOptions(opts._kms_providers, schema_map))
        self._bypass_auto_encryption = opts._bypass_auto_encryption
Esempio n. 27
0
    def collection_info(self, database, filter):
        """Get the collection info for a namespace.

        The returned collection info is passed to libmongocrypt which reads
        the JSON schema.

        :Parameters:
          - `database`: The database on which to run listCollections.
          - `filter`: The filter to pass to listCollections.

        :Returns:
          The first document from the listCollections command response as BSON.
        """
        with self.client_ref()[database].list_collections(
                filter=RawBSONDocument(filter)) as cursor:
            for doc in cursor:
                return _dict_to_bson(doc, False, _DATA_KEY_OPTS)
    def encrypt(self, database, cmd, codec_options):
        """Encrypt a MongoDB command.

        :Parameters:
          - `database`: The database for this command.
          - `cmd`: A command document.
          - `codec_options`: The CodecOptions to use while encoding `cmd`.

        :Returns:
          The encrypted command to execute.
        """
        self._check_closed()
        encoded_cmd = _dict_to_bson(cmd, False, codec_options)
        with _wrap_encryption_errors():
            encrypted_cmd = self._auto_encrypter.encrypt(database, encoded_cmd)
            # TODO: PYTHON-1922 avoid decoding the encrypted_cmd.
            encrypt_cmd = _inflate_bson(encrypted_cmd,
                                        DEFAULT_RAW_BSON_OPTIONS)
            return encrypt_cmd
Esempio n. 29
0
    def test_string_length(self):
        data = bson._dict_to_bson({"x": "abc"}, True,
                                  bson.DEFAULT_CODEC_OPTIONS)

        ndarray = bsonnumpy.sequence_to_ndarray(iter([data]),
                                                np.dtype([("x", "V1")]), 1)

        self.assertEqual(ndarray[0]["x"].tobytes(), b"a")
        ndarray = bsonnumpy.sequence_to_ndarray(iter([data]),
                                                np.dtype([("x", "V2")]), 1)

        self.assertEqual(ndarray[0]["x"].tobytes(), b"ab")
        ndarray = bsonnumpy.sequence_to_ndarray(iter([data]),
                                                np.dtype([("x", "V3")]), 1)

        self.assertEqual(ndarray[0]["x"].tobytes(), b"abc")
        ndarray = bsonnumpy.sequence_to_ndarray(iter([data]),
                                                np.dtype([("x", "V4")]), 1)

        self.assertEqual(ndarray[0]["x"].tobytes(), b"abc\0")
Esempio n. 30
0
    def test_array_scalar_load3(self):
        # Test sub arrays with documents that have arrays
        son_docs = [
            bson.SON(
                [('x', [
                    bson.SON([('a', [i, i, i, i]),
                              ('b', [i, i, i, i])]),
                    bson.SON([('a', [-i, -i, -i, -i]),
                              ('b', [-i, -i, -i, -i])])
                ])]) for i in range(10)]

        raw_docs = [bson._dict_to_bson(
            doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs]
        sub_dtype = np.dtype(([('a', '4int32'), ('b', '4int32')], 2))
        dtype = np.dtype([('x', sub_dtype)])

        # Correct dtype
        with self.assertRaisesPattern(bsonnumpy.error,
                                      r'unsupported BSON type: Sub-document'):
            bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 4)
Esempio n. 31
0
def _element_to_bson(key, value, check_keys, uuid_subtype):
    objtype = type(value)
    if isinstance(objtype,object): #objtype == types.InstanceType:
        objtype = value.__class__
    encoder = Extension.getpickler(objtype)
    if not encoder and hasattr(value, '__getstate__'):
        for _type in type(value).__mro__:
            encoder = Extension.getpickler(_type)
            if encoder:
                break
    if encoder:
        code, value = encoder(value)
        if isinstance(value, dict):
            assert '__customtype__' not in value
            value = value.copy()
            value['__customtype__'] =  code

    if isinstance(value, (list, tuple)):
        name = bson._make_c_string(key)
        as_dict = OrderedDict((str(k), v) for (k, v) in enumerate(value))
        return bson.BSONARR + name + bson._dict_to_bson(as_dict, check_keys,
                                                        uuid_subtype, False)

    return _bson_element_to_bson(key, value, check_keys, uuid_subtype)
 def test__dict_to_bson(self):
     document = {'average': Decimal('56.47')}
     rawbytes = BSON.encode(document, codec_options=self.codecopts)
     encoded_document = _dict_to_bson(document, False, self.codecopts)
     self.assertEqual(encoded_document, rawbytes)
class TestNdarrayFlat(TestToNdarray):
    dtype = np.dtype([('x', np.int32), ('y', np.int32)])
    bson_docs = [
        bson._dict_to_bson(bson.SON([("x", i), ("y", -i)]), False, DEFAULT)
        for i in range(10)
    ]

    ndarray = np.array([(i, -i) for i in range(10)], dtype=dtype)
    if hasattr(unittest.TestCase, 'assertRaisesRegex'):
        assertRaisesPattern = unittest.TestCase.assertRaisesRegex
    else:
        assertRaisesPattern = unittest.TestCase.assertRaisesRegexp

    def test_incorrect_arguments(self):
        with self.assertRaisesPattern(
                bsonnumpy.error, r'sequence_to_ndarray requires a numpy\.'
                r'ndarray'):
            bsonnumpy.ndarray_to_sequence(None)
        with self.assertRaisesPattern(
                bsonnumpy.error, r'sequence_to_ndarray requires a numpy\.'
                r'ndarray'):
            bsonnumpy.ndarray_to_sequence([])
        with self.assertRaisesPattern(
                bsonnumpy.error, r'sequence_to_ndarray requires a numpy\.'
                r'ndarray'):
            bsonnumpy.ndarray_to_sequence(1)
        with self.assertRaises(TypeError):
            bsonnumpy.ndarray_to_sequence(10, 10)

    def test_empty(self):
        dtype = np.dtype([('x', np.int32), ('y', np.float)])
        batch = b''.join([
            bson.BSON.encode({
                "x": 1,
                "y": 1.1
            }),
            bson.BSON.encode({}),
            bson.BSON.encode({
                "x": 3,
                "y": 1.3
            }),
        ])

        with self.assertRaisesPattern(bsonnumpy.error,
                                      r'document does not match dtype'):
            bsonnumpy.sequence_to_ndarray([batch], dtype, 3)

    def test_raw_batch(self):
        dtype = np.dtype([('x', np.int32), ('y', np.float)])

        # A variety of lengths.
        batch = b''.join([
            bson.BSON.encode({
                "x": 1,
                "y": 1.1
            }),
            bson.BSON.encode({
                "x": 2,
                "y": 1.2,
                "extra key": "foobar"
            }),
            bson.BSON.encode({
                "x": 3,
                "y": 1.3
            }),
        ])

        result = bsonnumpy.sequence_to_ndarray([batch], dtype, 3)
        ndarray = np.array([(1, 1.1), (2, 1.2), (3, 1.3)], dtype)
        np.testing.assert_array_equal(result, ndarray)

        dtype = np.dtype([('x', np.int32), ('y', np.float), ('z', np.int32)])

        # A variety of orders.
        batch = b''.join([
            bson.BSON.encode(SON([("x", 1), ("y", 1.1), ("z", 4)])),
            bson.BSON.encode(SON([("x", 2), ("z", 5), ("y", 1.2)])),
            bson.BSON.encode(SON([("z", 6), ("x", 3), ("y", 1.3)]))
        ])

        result = bsonnumpy.sequence_to_ndarray([batch], dtype, 3)
        ndarray = np.array([(1, 1.1, 4), (2, 1.2, 5), (3, 1.3, 6)], dtype)
        np.testing.assert_array_equal(result, ndarray)

    def test_dimensions_limit(self):
        # Make a deeply-nested dtype([('x', dtype([('x', dtype([('x', ...
        dtype = np.dtype([('y', np.int32)])
        for _ in range(31):
            dtype = np.dtype([('x', dtype)])

        # No error.
        bsonnumpy.sequence_to_ndarray([], dtype, 0)

        # One more level.
        dtype = np.dtype([('x', dtype)])
        with self.assertRaisesPattern(bsonnumpy.error, r'exceeds 32 levels'):
            bsonnumpy.sequence_to_ndarray([], dtype, 0)
 def test__dict_to_bson(self):
     document = {'average': Decimal('56.47')}
     rawbytes = encode(document, codec_options=self.codecopts)
     encoded_document = _dict_to_bson(document, False, self.codecopts)
     self.assertEqual(encoded_document, rawbytes)