Ejemplo n.º 1
0
    def test_array_scalar_load2(self):
        # Test sub arrays with documents as elements
        son_docs = [
            bson.SON(
                [('x', [
                    [
                        bson.SON([('a', i), ('b', i)]),
                        bson.SON([('a', -i), ('b', -i)])
                    ],
                    [
                        bson.SON([('c', i), ('d', i)]),
                        bson.SON([('c', -i), ('d', -i)])
                    ],

                ])]) for i in range(2, 4)]
        raw_docs = [bson._dict_to_bson(
            doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs]
        sub_sub_dtype = np.dtype(([('a', 'int32'), ('b', 'int32')], 2))
        sub_dtype = np.dtype((sub_sub_dtype, 2))
        dtype = np.dtype([('x', sub_dtype)])

        ndarray = np.array(
            [[([(i, i), (-i, -i)],),
              ([(i, i), (-i, -i)],)] for i in range(2, 4)], dtype)

        # Correct dtype
        with self.assertRaisesPattern(bsonnumpy.error,
                                      r'unsupported BSON type: unknown'):
            bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 2)
Ejemplo n.º 2
0
    def test_incorrect_sub_dtype4(self):
        # Sub document not a document
        bad_doc = bson.SON([("x", bson.SON([("y", 0), ("z", 0)])), ("q", 10)])

        bad_raw_docs = self.raw_docs[:9]
        bad_raw_docs.append(
            bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))

        with self.assertRaisesPattern(
                bsonnumpy.error,
                "invalid document: expected subdoc from dtype,"
                " got other type"):
            bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype_sub, 10)

        bad_doc = bson.SON([("x", bson.SON([("y", 0), ("z", 0)])),
                            ("q", [10, 11, 12])])

        bad_raw_docs = self.raw_docs[:9]
        bad_raw_docs.append(
            bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))

        with self.assertRaisesPattern(
                bsonnumpy.error,
                "invalid document: expected subdoc from dtype,"
                " got other type"):
            bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype_sub, 10)
Ejemplo n.º 3
0
 def test_null(self):
     data = bson._dict_to_bson({"x": None}, True,
                               bson.DEFAULT_CODEC_OPTIONS)
     with self.assertRaisesPattern(bsonnumpy.error,
                                   r'unsupported BSON type: Null'):
         bsonnumpy.sequence_to_ndarray(iter([data]),
                                       np.dtype([('x', '<V10')]), 1)
Ejemplo n.º 4
0
    def test_array_scalar_load4(self):
        # Test documents with multiple levels of sub documents
        son_docs = [
            bson.SON(
                [('x', [
                    [
                        bson.SON([('a', i), ('b', i)]),
                        bson.SON([('a', -i), ('b', -i)])
                    ],
                    [
                        bson.SON([('c', i), ('d', i)]),
                        bson.SON([('c', -i), ('d', -i)])
                    ],

                ])]) for i in range(10)]
        raw_docs = [bson._dict_to_bson(
            doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs]
        sub_sub_sub_dtype = np.dtype([('q', 'int32')])
        sub_sub_dtype = np.dtype(
            ([('a', sub_sub_sub_dtype), ('b', sub_sub_sub_dtype)], 2))
        sub_dtype = np.dtype((sub_sub_dtype, 2))
        dtype = np.dtype([('x', sub_dtype)])

        # Correct dtype
        with self.assertRaisesPattern(bsonnumpy.error,
                                      r'unsupported BSON type: unknown'):
            bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 4)
Ejemplo n.º 5
0
def raw_bson_func(use_large):
    c = db[collection_names[use_large]]
    if not hasattr(c, 'find_raw_batches'):
        print("Wrong PyMongo: no 'find_raw_batches' feature")
        return

    dtype = dtypes[use_large]
    bsonnumpy.sequence_to_ndarray(c.find_raw_batches(), dtype, c.count())
Ejemplo n.º 6
0
def bson_numpy_func(use_large):
    raw_coll = db.get_collection(
        collection_names[use_large],
        codec_options=CodecOptions(document_class=RawBSONDocument))

    cursor = raw_coll.find()
    dtype = dtypes[use_large]
    bsonnumpy.sequence_to_ndarray((doc.raw for doc in cursor), dtype,
                                  raw_coll.count())
Ejemplo n.º 7
0
    def _test_error(self, value, bson_type_name, codes):
        data = bson._dict_to_bson({'x': value}, True,
                                  bson.DEFAULT_CODEC_OPTIONS)

        for code in codes:
            dtype = np.dtype([('x', code)])
            expected = "cannot convert %s to dtype" % bson_type_name

            with self.assertRaisesPattern(bsonnumpy.error, expected):
                bsonnumpy.sequence_to_ndarray(iter([data]), dtype, 1)
Ejemplo n.º 8
0
    def test(self):
        data = bson._dict_to_bson(
            {"a": value},
            True,  # check_keys
            bson.DEFAULT_CODEC_OPTIONS)

        with self.assertRaises(bsonnumpy.error) as context:
            bsonnumpy.sequence_to_ndarray([data], np.dtype([("a", dtype)]), 1)

        self.assertIn("unsupported BSON type: %s" % type_name,
                      str(context.exception))
Ejemplo n.º 9
0
 def test_incorrect_sub_dtype_array7(self):
     # Sub array too short
     bad_doc = bson.SON([("x", [['d' * 1, 'd' * 2], ['d' * 4, 'd' * 5]]),
                         ("y", [['d' * 7, 'd' * 8], ['d' * 10, 'd' * 11]])])
     bad_raw_docs = self.raw_docs[:3]
     bad_raw_docs.append(
         bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))
     with self.assertRaisesPattern(
             bsonnumpy.error,
             "invalid document: array is of incorrect length"):
         bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype, 4)
Ejemplo n.º 10
0
    def test_incorrect_sub_dtype3(self):
        # Sub document missing key
        bad_doc = bson.SON([("x", bson.SON([("bad", 0), ("z", 0)])),
                            ("q", bson.SON([("y", 0), ("z", 0)]))])

        bad_raw_docs = self.raw_docs[:9]
        bad_raw_docs.append(
            bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))

        with self.assertRaisesPattern(bsonnumpy.error,
                                      "document does not match dtype"):
            bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype_sub, 10)
Ejemplo n.º 11
0
 def test_incorrect_sub_dtype_array1(self):
     # Top document missing key
     bad_doc = bson.SON([("x", [['d' * 1, 'd' * 2, 'd' * 3],
                                ['d' * 4, 'd' * 5, 'd' * 6]]),
                         ("bad_key", [['d' * 7, 'd' * 7, 'd' * 9],
                                      ['d' * 10, 'd' * 11, 'd' * 12]])])
     bad_raw_docs = self.raw_docs[:3]
     bad_raw_docs.append(
         bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))
     with self.assertRaisesPattern(bsonnumpy.error,
                                   "document does not match dtype"):
         bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype, 4)
Ejemplo n.º 12
0
    def test_incorrect_sub_dtype_array2(self):
        # Top-level array not array
        bad_doc = bson.SON([("x", [['d' * 1, 'd' * 2, 'd' * 3],
                                   ['d' * 4, 'd' * 5, 'd' * 6]]),
                            ("y", 'not an array')])
        bad_raw_docs = self.raw_docs[:3]
        bad_raw_docs.append(
            bson._dict_to_bson(bad_doc, False, bson.DEFAULT_CODEC_OPTIONS))

        with self.assertRaisesPattern(
                bsonnumpy.error,
                "invalid document: expected list from dtype, got other type"):
            bsonnumpy.sequence_to_ndarray(bad_raw_docs, self.dtype, 4)
Ejemplo n.º 13
0
def raw_bson_func(use_large):
    c = db[collection_names[use_large]]
    try:
        batches = list(c.find(raw_batches=True))
    except TypeError as exc:
        if "unexpected keyword argument 'raw_batches'" in str(exc):
            print("Wrong PyMongo: no 'raw_batches' feature")
            return
        else:
            raise

    dtype = dtypes[use_large]
    bsonnumpy.sequence_to_ndarray(batches, dtype, c.count())
Ejemplo n.º 14
0
    def test_dimensions_limit(self):
        # Make a deeply-nested dtype([('x', dtype([('x', dtype([('x', ...
        dtype = np.dtype([('y', np.int32)])
        for _ in range(31):
            dtype = np.dtype([('x', dtype)])

        # No error.
        bsonnumpy.sequence_to_ndarray([], dtype, 0)

        # One more level.
        dtype = np.dtype([('x', dtype)])
        with self.assertRaisesPattern(bsonnumpy.error, r'exceeds 32 levels'):
            bsonnumpy.sequence_to_ndarray([], dtype, 0)
Ejemplo n.º 15
0
    def test_incorrect_dtype(self):
        dtype = np.dtype([('a', np.int32), ('b', np.int32)])

        # Dtype is named, but does not match documents
        with self.assertRaisesPattern(bsonnumpy.error,
                                      r'document does not match dtype'):
            bsonnumpy.sequence_to_ndarray(self.bson_docs, dtype, 10)

        # Dtype is not named
        with self.assertRaisesPattern(
                bsonnumpy.error, r'dtype must include field names,'
                r' like dtype\(\[\(\'fieldname\', numpy.int\)\]\)'):
            bsonnumpy.sequence_to_ndarray(self.bson_docs, np.dtype(np.int32),
                                          10)

        # Dtype is simple array
        with self.assertRaisesPattern(
                bsonnumpy.error, r'dtype must include field names,'
                r' like dtype\(\[\(\'fieldname\', numpy.int\)\]\)'):
            bsonnumpy.sequence_to_ndarray(self.bson_docs,
                                          np.dtype('(3,2)int32'), 10)

        # Dtype is null or empty
        with self.assertRaisesPattern(bsonnumpy.error,
                                      r'dtype must include field names'):
            bsonnumpy.sequence_to_ndarray(self.bson_docs, None, 1)
Ejemplo n.º 16
0
    def test_incorrect_sub_dtype5(self):
        # Sub document extra key
        dtype2 = np.dtype([('y', np.int32), ('z', np.int32)])
        dtype_sub2 = np.dtype([('x', dtype2)])

        ndarray2 = np.array([((i, i), ) for i in range(10)], dtype=dtype_sub2)
        res = bsonnumpy.sequence_to_ndarray(self.raw_docs, dtype_sub2, 10)
        self.assertTrue(np.array_equal(ndarray2, res))

        dtype3 = np.dtype([('y', np.int32)])
        dtype_sub3 = np.dtype([('x', dtype3), ('q', dtype3)])
        ndarray3 = np.array([((i, ), (i, )) for i in range(10)],
                            dtype=dtype_sub3)
        res = bsonnumpy.sequence_to_ndarray(self.raw_docs, dtype_sub3, 10)
        self.assertTrue(np.array_equal(ndarray3, res))
Ejemplo n.º 17
0
 def test_incorrect_arguments(self):
     # Expects iterator, dtype, count
     with self.assertRaisesPattern(TypeError, r'\binteger\b'):
         bsonnumpy.sequence_to_ndarray(None, None, None)
     with self.assertRaisesPattern(
             TypeError, r'sequence_to_ndarray requires an iterator'):
         bsonnumpy.sequence_to_ndarray(0, self.dtype, 0)
     with self.assertRaisesPattern(bsonnumpy.error,
                                   r'dtype must include field names'):
         bsonnumpy.sequence_to_ndarray(self.bson_docs, None, 10)
     with self.assertRaisesPattern(
             TypeError, r'sequence_to_ndarray requires an iterator'):
         bsonnumpy.sequence_to_ndarray(self.dtype, self.dtype, 10)
     with self.assertRaisesPattern(
             TypeError, r'function takes exactly 3 arguments \(4 given\)'):
         bsonnumpy.sequence_to_ndarray(self.dtype, self.bson_docs, 10, 10)
Ejemplo n.º 18
0
    def findAll(self, start, end):
        filter = {"date": {"$gte": start, "$lt": end}}
        global dtype

        return bsonnumpy.sequence_to_ndarray(
            self.colecao.find_raw_batches(filter).limit(100).batch_size(100),
            dtype, self.colecao.count(filter))
Ejemplo n.º 19
0
    def make_mixed_collection_test(self, docs, dtype):
        coll = self.get_cursor_sequence(docs)

        ndarray = bsonnumpy.sequence_to_ndarray(coll.find_raw_batches(), dtype,
                                                coll.count())
        self.compare_results(np.dtype(dtype),
                             self.client.bsonnumpy_test.coll.find(), ndarray)
Ejemplo n.º 20
0
    def test_datetime(self):
        docs = [{
            "x": datetime.datetime(1970, 1, 1)
        }, {
            "x": datetime.datetime(1980, 1, 1)
        }, {
            "x": datetime.datetime(1990, 1, 1)
        }]
        dtype = np.dtype([('x', np.int64)])

        self.client.bsonnumpy_test.coll.delete_many({})
        self.client.bsonnumpy_test.coll.insert_many(docs)
        raw_coll = self.client.get_database(
            'bsonnumpy_test',
            codec_options=CodecOptions(document_class=RawBSONDocument)).coll

        cursor = raw_coll.find()
        ndarray = bsonnumpy.sequence_to_ndarray((doc.raw for doc in cursor),
                                                dtype, raw_coll.count())

        for i, row in enumerate(ndarray):
            document = docs[i]
            self.assertEqual(
                millis(document["x"] - datetime.datetime(1970, 1, 1)),
                row["x"])
Ejemplo n.º 21
0
    def test_deeply_nested_array(self):
        # arrays of length 1 are maintained when they are within another array
        dtype = np.dtype([("a", "(3,2,1)int32"), ("b", "(3,2,1)int32")])

        doc = bson.SON([("a", [[[9], [9]], [[8], [8]], [[7], [7]]]),
                        ("b", [[[6], [6]], [[5], [5]], [[4], [4]]])])

        utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS)
        result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1)
        self.assertEqual(dtype, result.dtype)
        self.assertTrue(
            np.array_equal(
                result,
                np.array([([[[9], [9]], [[8], [8]], [[7], [7]]
                            ], [[[6], [6]], [[5], [5]], [[4], [4]]])], dtype)))

        dtype = np.dtype([("a", "(3,1)int32"), ("b", "(3,1)int32"),
                          ("c", "(3,1)int32")])

        doc = bson.SON([("a", [[9], [8], [7]]), ("b", [[6], [5], [4]]),
                        ("c", [[3], [2], [1]])])

        utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS)
        result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1)
        self.assertEqual(dtype, result.dtype)
        self.assertTrue(
            np.array_equal(
                result,
                np.array([([[9], [8], [7]], [[6], [5], [4]], [[3], [2], [1]])],
                         dtype)))

        dtype = np.dtype([("a", "2int32")])
        doc = bson.SON([("a", [7, 7])])
        utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS)
        result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1)
        self.assertEqual(dtype, result.dtype)
        self.assertTrue(np.array_equal(result, np.array([([7, 7], )], dtype)))

        dtype = np.dtype([("a", "(2,1,1,1)int32")])
        doc = bson.SON([("a", [[[[99]]], [[[88]]]])])
        utf8 = bson._dict_to_bson(doc, False, bson.DEFAULT_CODEC_OPTIONS)

        result = bsonnumpy.sequence_to_ndarray([utf8], dtype, 1)
        self.assertEqual(dtype, result.dtype)
        self.assertTrue(
            np.array_equal(result, np.array([([[[[99]]], [[[88]]]], )],
                                            dtype)))
Ejemplo n.º 22
0
    def test_empty(self):
        dtype = np.dtype([('x', np.int32), ('y', np.float)])
        batch = b''.join([
            bson.BSON.encode({
                "x": 1,
                "y": 1.1
            }),
            bson.BSON.encode({}),
            bson.BSON.encode({
                "x": 3,
                "y": 1.3
            }),
        ])

        with self.assertRaisesPattern(bsonnumpy.error,
                                      r'document does not match dtype'):
            bsonnumpy.sequence_to_ndarray([batch], dtype, 3)
Ejemplo n.º 23
0
 def compare_seq_to_ndarray_result(self, np_type, document):
     data = bson._dict_to_bson(document, False, bson.DEFAULT_CODEC_OPTIONS)
     dtype = np.dtype(np_type)
     result = bsonnumpy.sequence_to_ndarray([data], dtype, 1)
     self.assertEqual(result.dtype, dtype)
     for key in document:
         self.assertEqual(result[0][key], document[key],
                          "Comparison failed for type %s: %s != %s" % (
                              dtype, result[0][key], document[key]))
Ejemplo n.º 24
0
    def findDiskUsage(self, start, end):
        filter = {"date": {"$gte": start, "$lt": end}}
        projection = {"disk_usage": True}
        global dtype

        return bsonnumpy.sequence_to_ndarray(
            self.colecao.find_raw_batches(
                filter, projection).limit(100).batch_size(100), dtype,
            self.colecao.count(filter))
Ejemplo n.º 25
0
    def test_array_scalar_load3(self):
        # Test sub arrays with documents that have arrays
        son_docs = [
            bson.SON(
                [('x', [
                    bson.SON([('a', [i, i, i, i]),
                              ('b', [i, i, i, i])]),
                    bson.SON([('a', [-i, -i, -i, -i]),
                              ('b', [-i, -i, -i, -i])])
                ])]) for i in range(10)]

        raw_docs = [bson._dict_to_bson(
            doc, False, bson.DEFAULT_CODEC_OPTIONS) for doc in son_docs]
        sub_dtype = np.dtype(([('a', '4int32'), ('b', '4int32')], 2))
        dtype = np.dtype([('x', sub_dtype)])

        # Correct dtype
        with self.assertRaisesPattern(bsonnumpy.error,
                                      r'unsupported BSON type: Sub-document'):
            bsonnumpy.sequence_to_ndarray(raw_docs, dtype, 4)
Ejemplo n.º 26
0
    def test_string_length(self):
        data = bson._dict_to_bson({"x": "abc"}, True,
                                  bson.DEFAULT_CODEC_OPTIONS)

        ndarray = bsonnumpy.sequence_to_ndarray(iter([data]),
                                                np.dtype([("x", "V1")]), 1)

        self.assertEqual(ndarray[0]["x"].tobytes(), b"a")
        ndarray = bsonnumpy.sequence_to_ndarray(iter([data]),
                                                np.dtype([("x", "V2")]), 1)

        self.assertEqual(ndarray[0]["x"].tobytes(), b"ab")
        ndarray = bsonnumpy.sequence_to_ndarray(iter([data]),
                                                np.dtype([("x", "V3")]), 1)

        self.assertEqual(ndarray[0]["x"].tobytes(), b"abc")
        ndarray = bsonnumpy.sequence_to_ndarray(iter([data]),
                                                np.dtype([("x", "V4")]), 1)

        self.assertEqual(ndarray[0]["x"].tobytes(), b"abc\0")
    def test_aggregate_raw_batches(self):
        dtype = np.dtype([('y', np.int32)])
        docs = [{"x": i} for i in range(10)]
        expected = [(2 * i, ) for i in range(10)]

        coll = self.get_cursor_sequence(docs)
        pipeline = [{'$project': {'y': {'$multiply': [2, '$x']}}}]
        ndarray = bsonnumpy.sequence_to_ndarray(
            coll.aggregate_raw_batches(pipeline), dtype, coll.count())

        self.assertEqual(dtype, ndarray.dtype)
        np.testing.assert_array_equal(ndarray, np.array(expected, dtype))
    def test_objectid(self):
        docs = [{"x": bson.ObjectId()} for _ in range(10)]
        dtype = np.dtype([('x', '<V12')])

        self.client.bsonnumpy_test.coll.delete_many({})
        self.client.bsonnumpy_test.coll.insert_many(docs)
        cursor = self.client.bsonnumpy_test.coll.find_raw_batches()
        ndarray = bsonnumpy.sequence_to_ndarray(cursor, dtype, cursor.count())

        for i, row in enumerate(ndarray):
            document = docs[i]
            self.assertEqual(document["x"].binary, row["x"].tobytes())
Ejemplo n.º 29
0
    def test_incorrect_sub_dtype2(self):
        # Top document has extra key
        data = bson._dict_to_bson({
            "x": 12,
            "y": 13
        }, True, bson.DEFAULT_CODEC_OPTIONS)

        ndarray = bsonnumpy.sequence_to_ndarray([data],
                                                np.dtype([("y", np.int)]), 1)

        self.assertEqual(1, len(ndarray))
        self.assertEqual(13, ndarray[0]["y"])

        with self.assertRaises(ValueError):
            ndarray[0]["x"]
Ejemplo n.º 30
0
    def test_raw_batch(self):
        dtype = np.dtype([('x', np.int32), ('y', np.float)])

        # A variety of lengths.
        batch = b''.join([
            bson.BSON.encode({
                "x": 1,
                "y": 1.1
            }),
            bson.BSON.encode({
                "x": 2,
                "y": 1.2,
                "extra key": "foobar"
            }),
            bson.BSON.encode({
                "x": 3,
                "y": 1.3
            }),
        ])

        result = bsonnumpy.sequence_to_ndarray([batch], dtype, 3)
        ndarray = np.array([(1, 1.1), (2, 1.2), (3, 1.3)], dtype)
        np.testing.assert_array_equal(result, ndarray)

        dtype = np.dtype([('x', np.int32), ('y', np.float), ('z', np.int32)])

        # A variety of orders.
        batch = b''.join([
            bson.BSON.encode(SON([("x", 1), ("y", 1.1), ("z", 4)])),
            bson.BSON.encode(SON([("x", 2), ("z", 5), ("y", 1.2)])),
            bson.BSON.encode(SON([("z", 6), ("x", 3), ("y", 1.3)]))
        ])

        result = bsonnumpy.sequence_to_ndarray([batch], dtype, 3)
        ndarray = np.array([(1, 1.1, 4), (2, 1.2, 5), (3, 1.3, 6)], dtype)
        np.testing.assert_array_equal(result, ndarray)