Ejemplo n.º 1
0
 def setUp(self):
     self.albums = Marble(name="Albums",
                          fields=_FIELDS,
                          partition=_PARTITIONS)
     self.transaction = Marble(name="Transcation",
                               fields=_FIELDS_SELL,
                               partition=_PARTITIONS)
     self.single_where = [(self.albums.rating > 3)]
     self.multi_wheres = [
         (self.albums.rating > 3) & (self.albums.id == 1000)
     ]
     self.cross_wheres = [
         self.albums.rating > 3, self.transaction.id == 1000
     ]
     self.single_select = [self.albums.name]
     self.multi_select = [
         self.albums.name, self.albums.date, self.albums.rating
     ]
     self.cross_select = [
         self.albums.name, self.albums.artist, self.transaction.store_id,
         self.transaction.price
     ]
     self.order_by = [self.albums.quantity, self.albums.rating]
     self.join = [self.albums.id, self.transaction.item_id]
     self.join_invalid = [self.albums.id, self.transaction.price]
     self.join_invalid_1 = [self.albums.id, self.albums.id]
     self.join_invalid_2 = [self.albums.id, self.transaction.price]
     self.limit_single = 100
     self.limit_single_invalid = -100
Ejemplo n.º 2
0
class TestInsertPartitionFilter(unittest.TestCase):
    def test_partition_numbers(self):
        self.albums = [dict(zip(_FIELDS_RAW, album)) for album in _ALBUMS]
        self.marble = Marble(name="Collections",
                             fields=_FIELDS,
                             partition=_PARTITIONS)
        self.n_inserted, self.files = self.marble._insert([(ujson.dumps(l) for l in self.albums)],
                                                          partition_filter='1992-10-03')
        self.assertEquals(len(self.files), 1)
        inserted = len([1 for album in self.albums if album['date'] == '1992-10-03'])
        self.assertEquals(inserted, self.n_inserted)

        for date, file in self.files.iteritems():
            os.unlink(file)

    def test_partition_numbers_set(self):
        self.albums = [dict(zip(_FIELDS_RAW, album)) for album in _ALBUMS]
        self.marble = Marble(name="Collections",
                             fields=_FIELDS,
                             partition=_PARTITIONS)
        self.n_inserted, self.files = self.marble._insert([(ujson.dumps(l) for l in self.albums)],
                                                          partition_filter=['1992-10-03', '1986-01-03'])
        self.assertEquals(len(self.files), 2)
        inserted = len([1 for album in self.albums if album['date'] == '1992-10-03' or
                        album['date'] == '1986-01-03'])
        self.assertEquals(inserted, self.n_inserted)

        for date, file in self.files.iteritems():
            os.unlink(file)
Ejemplo n.º 3
0
class TestInsertPartitionFilter(unittest.TestCase):
    def test_partition_numbers(self):
        self.albums = [dict(zip(_FIELDS_RAW, album)) for album in _ALBUMS]
        self.marble = Marble(name="Collections",
                             fields=_FIELDS,
                             partition=_PARTITIONS)
        self.n_inserted, self.files = self.marble._insert(
            [(ujson.dumps(l) for l in self.albums)],
            partition_filter='1992-10-03')
        self.assertEquals(len(self.files), 1)
        inserted = len(
            [1 for album in self.albums if album['date'] == '1992-10-03'])
        self.assertEquals(inserted, self.n_inserted)

        for date, file in self.files.iteritems():
            os.unlink(file)

    def test_partition_numbers_set(self):
        self.albums = [dict(zip(_FIELDS_RAW, album)) for album in _ALBUMS]
        self.marble = Marble(name="Collections",
                             fields=_FIELDS,
                             partition=_PARTITIONS)
        self.n_inserted, self.files = self.marble._insert(
            [(ujson.dumps(l) for l in self.albums)],
            partition_filter=['1992-10-03', '1986-01-03'])
        self.assertEquals(len(self.files), 2)
        inserted = len([
            1 for album in self.albums
            if album['date'] == '1992-10-03' or album['date'] == '1986-01-03'
        ])
        self.assertEquals(inserted, self.n_inserted)

        for date, file in self.files.iteritems():
            os.unlink(file)
Ejemplo n.º 4
0
 def setUp(self):
     self.albums = [dict(zip(_FIELDS_RAW, album)) for album in _ALBUMS]
     self.marble = Marble(name="Collections",
                          fields=_FIELDS,
                          partition=_PARTITIONS)
     self.n_inserted, self.files = self.marble._insert([
         (ujson.dumps(l) for l in self.albums)
     ])
Ejemplo n.º 5
0
 def setUp(self):
     self.albums = [dict(zip(_FIELDS_RAW, album)) for album in _ALBUMS]
     self.marble = Marble(name="Collections",
                          fields=_FIELDS,
                          partition=_PARTITIONS)
     self.n_inserted, self.files = self.marble._insert([(ujson.dumps(l) for l in self.albums)])
Ejemplo n.º 6
0
class TestMarble(unittest.TestCase):
    def setUp(self):
        self.albums = [dict(zip(_FIELDS_RAW, album)) for album in _ALBUMS]
        self.marble = Marble(name="Collections",
                             fields=_FIELDS,
                             partition=_PARTITIONS)
        self.n_inserted, self.files = self.marble._insert([(ujson.dumps(l) for l in self.albums)])

    def tearDown(self):
        for date, file in self.files.iteritems():
            os.unlink(file)

    def test_field_names(self):
        self.assertListEqual(sorted(list(_FIELDS_RAW)), sorted(self.marble._field_names))

    def test_marble_insert(self):
        #  test general infomation
        self.assertEqual(self.n_inserted, len(_ALBUMS))
        self.assertEqual(_NPARTITIONS, len(self.files))
        part_id = {}
        #  test that each sub db is fine
        for date, file in self.files.iteritems():
            env, txn, dbs, meta = self.marble._open(file)
            #  check meta db
            self.assertTrue(meta.contains(txn, "_vid_nodes"))
            self.assertTrue(meta.contains(txn, "_vid_kids"))
            self.assertTrue(meta.contains(txn, "_vid16_nodes"))
            self.assertTrue(meta.contains(txn, "_vid16_kids"))
            self.assertEqual(meta.get(txn, "name"), ujson.dumps("Collections"))
            self.assertEqual(meta.get(txn, "partition"), ujson.dumps("date"))
            self.assertEqual(meta.get(txn, "fields"), ujson.dumps(_FIELDS))
            vid_nodes, _ = meta.get_raw(txn, '_vid_nodes')
            vid_kids, _ = meta.get_raw(txn, '_vid_kids')
            vid16_nodes, _ = meta.get_raw(txn, '_vid16_nodes', (None, 0))
            vid16_kids, _ = meta.get_raw(txn, '_vid16_kids', (None, 0))
            #  check subdb, subinddb
            part_id[date] = 1
            for name, (db, ind_db, _, column) in dbs.iteritems():
                bitmaps = {}
                part_id[date] = 1
                for album in self.albums:
                    if date == album[_PARTITIONS]:  # match the partition
                        value = album[name]
                        i = part_id[album[_PARTITIONS]]
                        part_id[album[_PARTITIONS]] += 1
                        if column.is_trie:
                            if column.rtrie_indicator == mdb.MDB_UINT_16:
                                val = rtrie.vid_for_value(vid16_nodes, vid16_kids, value)
                            else:
                                val = rtrie.vid_for_value(vid_nodes, vid_kids, value)
                        elif column.is_lz4:
                            val = clz4.compress(value)
                        else:
                            val = value
                        self.assertEqual(db.get(txn, i), val)
                        if ind_db is not None:
                            #  row_id should be in bitmap too
                            if val in bitmaps:
                                bitmap = bitmaps[val]
                            else:
                                bitmap = BitSet()
                                bitmap.loads(ind_db.get(txn, val))
                                bitmaps[val] = bitmap
                            self.assertTrue(i in bitmap)
            txn.commit()
            env.close()

    def test_marble_stream_get(self):
        for date, file in self.files.iteritems():
            stream = MarbleStream(file)
            rowid = 1
            for album in self.albums:
                if album[_PARTITIONS] != date:
                    continue
                # test 'get' first
                for k, v in album.iteritems():
                    self.assertEqual(v, stream.get(k, rowid))
                rowid += 1
            stream.close()

    def test_marble_stream_bit_ops(self):
        stream = MarbleStream(self.files["1992-10-03"])
        rowid = 1
        # test "name" index
        for album in self.albums:
            if album[_PARTITIONS] != "1992-10-03":
                continue
            bitset = stream.bit_eq("name", album["name"])
            bs = BitSet()
            bs.set(rowid)
            rowid += 1
            for i in bitset:
                self.assertTrue(i in bs)
        # test "genre" index
        bitset = stream.bit_eq("genre", "R&R")
        bs = BitSet()
        for i in range(1, 5):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        stream.close()

        stream = MarbleStream(self.files["1986-01-03"])
        rowid = 1
        # test "name" index
        for album in self.albums:
            if album[_PARTITIONS] != "1986-01-03":
                continue
            bitset = stream.bit_eq("name", album["name"])
            bs = BitSet()
            bs.set(rowid)
            rowid += 1
            for i in bitset:
                self.assertTrue(i in bs)
        # test "genre" index
        bitset = stream.bit_eq("genre", "SoundTrack")
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        # test "rating" index
        # test for eq and not-eq
        bitset = stream.bit_eq("rating", 4)
        bs = BitSet()
        bs.set(4)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_eq("rating", 3)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_eq("rating", 5)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 5)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 3)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 4)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        # test for less_than and less_eq
        bitset = stream.bit_ge("rating", 3)
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 3)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 3)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 3)
        bs = BitSet()
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 5)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 5)
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 5)
        bs = BitSet()
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ge("rating", 5)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 4)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 4)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ge("rating", 4)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 4)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        stream.close()
Ejemplo n.º 7
0
class TestMarble(unittest.TestCase):
    def setUp(self):
        self.albums = [dict(zip(_FIELDS_RAW, album)) for album in _ALBUMS]
        self.marble = Marble(name="Collections",
                             fields=_FIELDS,
                             partition=_PARTITIONS)
        self.n_inserted, self.files = self.marble._insert([
            (ujson.dumps(l) for l in self.albums)
        ])

    def tearDown(self):
        for date, file in self.files.iteritems():
            os.unlink(file)

    def test_field_names(self):
        self.assertListEqual(sorted(list(_FIELDS_RAW)),
                             sorted(self.marble._field_names))

    def test_marble_insert(self):
        #  test general infomation
        self.assertEqual(self.n_inserted, len(_ALBUMS))
        self.assertEqual(_NPARTITIONS, len(self.files))
        part_id = {}
        #  test that each sub db is fine
        for date, file in self.files.iteritems():
            env, txn, dbs, meta = self.marble._open(file)
            #  check meta db
            self.assertTrue(meta.contains(txn, "_vid_nodes"))
            self.assertTrue(meta.contains(txn, "_vid_kids"))
            self.assertTrue(meta.contains(txn, "_vid16_nodes"))
            self.assertTrue(meta.contains(txn, "_vid16_kids"))
            self.assertEqual(meta.get(txn, "name"), ujson.dumps("Collections"))
            self.assertEqual(meta.get(txn, "partition"), ujson.dumps("date"))
            self.assertEqual(meta.get(txn, "fields"), ujson.dumps(_FIELDS))
            self.assertEqual(meta.get(txn, "_pdata"), ujson.dumps(date))
            vid_nodes, _ = meta.get_raw(txn, '_vid_nodes')
            vid_kids, _ = meta.get_raw(txn, '_vid_kids')
            vid16_nodes, _ = meta.get_raw(txn, '_vid16_nodes', (None, 0))
            vid16_kids, _ = meta.get_raw(txn, '_vid16_kids', (None, 0))
            #  check subdb, subinddb
            part_id[date] = 1
            for name, (db, ind_db, _, column, _) in dbs.iteritems():
                if name == "_count":
                    continue
                bitmaps = {}
                part_id[date] = 1
                for album in self.albums:
                    if date == album[_PARTITIONS]:  # match the partition
                        value = album[name]
                        i = part_id[album[_PARTITIONS]]
                        part_id[album[_PARTITIONS]] += 1
                        if column.is_trie:
                            if column.rtrie_indicator == mdb.MDB_UINT_16:
                                val = rtrie.vid_for_value(
                                    vid16_nodes, vid16_kids, value)
                            else:
                                val = rtrie.vid_for_value(
                                    vid_nodes, vid_kids, value)
                        elif column.is_lz4:
                            val = clz4.compress(value)
                        else:
                            val = value
                        # self.assertEqual(db.get(txn, i), val)
                        if ind_db is not None:
                            #  row_id should be in bitmap too
                            if val in bitmaps:
                                bitmap = bitmaps[val]
                            else:
                                bitmap = BitSet()
                                bitmap.loads(ind_db.get(txn, val))
                                bitmaps[val] = bitmap
                            self.assertTrue(i in bitmap)
            txn.commit()
            env.close()

    def test_marble_stream_get(self):
        for date, file in self.files.iteritems():
            stream = MarbleStream(file)
            rowid = 1
            for album in self.albums:
                if album[_PARTITIONS] != date:
                    continue
                # test 'get' first
                for k, v in album.iteritems():
                    self.assertEqual(v, stream.get(k, rowid))
                rowid += 1
            stream.close()

    def test_marble_stream_bit_ops(self):
        stream = MarbleStream(self.files["1992-10-03"])
        rowid = 1
        # test "name" index
        for album in self.albums:
            if album[_PARTITIONS] != "1992-10-03":
                continue
            bitset = stream.bit_eq("name", album["name"])
            bs = BitSet()
            bs.set(rowid)
            rowid += 1
            for i in bitset:
                self.assertTrue(i in bs)
        # test "genre" index
        bitset = stream.bit_eq("genre", "R&R")
        bs = BitSet()
        for i in range(1, 5):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        stream.close()

        stream = MarbleStream(self.files["1986-01-03"])
        rowid = 1
        # test "name" index
        for album in self.albums:
            if album[_PARTITIONS] != "1986-01-03":
                continue
            bitset = stream.bit_eq("name", album["name"])
            bs = BitSet()
            bs.set(rowid)
            rowid += 1
            for i in bitset:
                self.assertTrue(i in bs)
        # test "genre" index
        bitset = stream.bit_eq("genre", "SoundTrack")
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        # test "rating" index
        # test for eq and not-eq
        bitset = stream.bit_eq("rating", 4)
        bs = BitSet()
        bs.set(4)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_eq("rating", 3)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_eq("rating", 5)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 5)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 3)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 4)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        # test "rating" index
        # test for eq_ex and not_eq_ex
        bitset = stream.bit_eq_ex("rating", [3, 4])
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_eq_ex("rating", [5])
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne_ex("rating", [5])
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne_ex("rating", [3, 4])
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        # test for less_than and less_eq
        bitset = stream.bit_ge("rating", 3)
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 3)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 3)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 3)
        bs = BitSet()
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 5)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 5)
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 5)
        bs = BitSet()
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ge("rating", 5)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 4)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 4)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ge("rating", 4)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 4)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        stream.close()
Ejemplo n.º 8
0
 def setUp(self):
     self.emp = Marble(name="employee", fields=EMP_FIELDS)
     self.dept = Marble(name="department", fields=DEPT_FIELDS)