Example #1
0
class ClickHouseBaseBenchmark(AsyncBenchmark):
    def __init__(self, model):
        super(AsyncBenchmark, self).__init__()
        self.db = Database('TestDB')
        self.db.create_table(model)

    def _insert_data(self, data):
        self.db.insert(data)

    def _validate_data(self, expected, table):
        now = time.time()
        value = int(self.db.raw('SELECT count(*) FROM {}'.format(table)))
        if expected == value:
            logger.info('The stored data is equal to the produced quantity.')
        else:
            logger.warning(
                'The stored data is different to the produced quantity (expected {} != {}).'
                .format(expected, value))
        return (expected, value, time.time() - now)
class MigrationsTestCase(unittest.TestCase):
    def setUp(self):
        self.database = Database('test-db', log_statements=True)
        self.database.drop_table(MigrationHistory)

    def tearDown(self):
        self.database.drop_database()

    def table_exists(self, model_class):
        query = "EXISTS TABLE $db.`%s`" % model_class.table_name()
        return next(self.database.select(query)).result == 1

    def get_table_fields(self, model_class):
        query = "DESC `%s`.`%s`" % (self.database.db_name,
                                    model_class.table_name())
        return [(row.name, row.type) for row in self.database.select(query)]

    def get_table_def(self, model_class):
        return self.database.raw('SHOW CREATE TABLE $db.`%s`' %
                                 model_class.table_name())

    def test_migrations(self):
        # Creation and deletion of table
        self.database.migrate('tests.sample_migrations', 1)
        self.assertTrue(self.table_exists(Model1))
        self.database.migrate('tests.sample_migrations', 2)
        self.assertFalse(self.table_exists(Model1))
        self.database.migrate('tests.sample_migrations', 3)
        self.assertTrue(self.table_exists(Model1))
        # Adding, removing and altering simple fields
        self.assertEqual(self.get_table_fields(Model1), [('date', 'Date'),
                                                         ('f1', 'Int32'),
                                                         ('f2', 'String')])
        self.database.migrate('tests.sample_migrations', 4)
        self.assertEqual(self.get_table_fields(Model2),
                         [('date', 'Date'), ('f1', 'Int32'), ('f3', 'Float32'),
                          ('f2', 'String'), ('f4', 'String'),
                          ('f5', 'Array(UInt64)')])
        self.database.migrate('tests.sample_migrations', 5)
        self.assertEqual(self.get_table_fields(Model3), [('date', 'Date'),
                                                         ('f1', 'Int64'),
                                                         ('f3', 'Float64'),
                                                         ('f4', 'String')])
        # Altering enum fields
        self.database.migrate('tests.sample_migrations', 6)
        self.assertTrue(self.table_exists(EnumModel1))
        self.assertEqual(self.get_table_fields(EnumModel1),
                         [('date', 'Date'),
                          ('f1', "Enum8('dog' = 1, 'cat' = 2, 'cow' = 3)")])
        self.database.migrate('tests.sample_migrations', 7)
        self.assertTrue(self.table_exists(EnumModel1))
        self.assertEqual(
            self.get_table_fields(EnumModel2),
            [('date', 'Date'),
             ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
        # Materialized fields and alias fields
        self.database.migrate('tests.sample_migrations', 8)
        self.assertTrue(self.table_exists(MaterializedModel))
        self.assertEqual(self.get_table_fields(MaterializedModel),
                         [('date_time', "DateTime"), ('date', 'Date')])
        self.database.migrate('tests.sample_migrations', 9)
        self.assertTrue(self.table_exists(AliasModel))
        self.assertEqual(self.get_table_fields(AliasModel),
                         [('date', 'Date'), ('date_alias', "Date")])
        # Buffer models creation and alteration
        self.database.migrate('tests.sample_migrations', 10)
        self.assertTrue(self.table_exists(Model4))
        self.assertTrue(self.table_exists(Model4Buffer))
        self.assertEqual(self.get_table_fields(Model4), [('date', 'Date'),
                                                         ('f1', 'Int32'),
                                                         ('f2', 'String')])
        self.assertEqual(self.get_table_fields(Model4Buffer),
                         [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
        self.database.migrate('tests.sample_migrations', 11)
        self.assertEqual(self.get_table_fields(Model4), [('date', 'Date'),
                                                         ('f3', 'DateTime'),
                                                         ('f2', 'String')])
        self.assertEqual(self.get_table_fields(Model4Buffer),
                         [('date', 'Date'), ('f3', 'DateTime'),
                          ('f2', 'String')])

        self.database.migrate('tests.sample_migrations', 12)
        self.assertEqual(self.database.count(Model3), 3)
        data = [
            item.f1 for item in self.database.select(
                'SELECT f1 FROM $table ORDER BY f1', model_class=Model3)
        ]
        self.assertListEqual(data, [1, 2, 3])

        self.database.migrate('tests.sample_migrations', 13)
        self.assertEqual(self.database.count(Model3), 4)
        data = [
            item.f1 for item in self.database.select(
                'SELECT f1 FROM $table ORDER BY f1', model_class=Model3)
        ]
        self.assertListEqual(data, [1, 2, 3, 4])

        self.database.migrate('tests.sample_migrations', 14)
        self.assertTrue(self.table_exists(MaterializedModel1))
        self.assertEqual(self.get_table_fields(MaterializedModel1),
                         [('date_time', 'DateTime'), ('int_field', 'Int8'),
                          ('date', 'Date'), ('int_field_plus_one', 'Int8')])
        self.assertTrue(self.table_exists(AliasModel1))
        self.assertEqual(self.get_table_fields(AliasModel1),
                         [('date', 'Date'), ('int_field', 'Int8'),
                          ('date_alias', 'Date'),
                          ('int_field_plus_one', 'Int8')])
        # Codecs and low cardinality
        self.database.migrate('tests.sample_migrations', 15)
        self.assertTrue(self.table_exists(Model4_compressed))
        if self.database.has_low_cardinality_support:
            self.assertEqual(self.get_table_fields(Model2LowCardinality),
                             [('date', 'Date'),
                              ('f1', 'LowCardinality(Int32)'),
                              ('f3', 'LowCardinality(Float32)'),
                              ('f2', 'LowCardinality(String)'),
                              ('f4', 'LowCardinality(Nullable(String))'),
                              ('f5', 'Array(LowCardinality(UInt64))')])
        else:
            logging.warning('No support for low cardinality')
            self.assertEqual(self.get_table_fields(Model2),
                             [('date', 'Date'), ('f1', 'Int32'),
                              ('f3', 'Float32'), ('f2', 'String'),
                              ('f4', 'Nullable(String)'),
                              ('f5', 'Array(UInt64)')])

        if self.database.server_version >= (19, 14, 3, 3):
            # Creating constraints
            self.database.migrate('tests.sample_migrations', 16)
            self.assertTrue(self.table_exists(ModelWithConstraints))
            self.database.insert([ModelWithConstraints(f1=101, f2='a')])
            with self.assertRaises(ServerError):
                self.database.insert([ModelWithConstraints(f1=99, f2='a')])
            with self.assertRaises(ServerError):
                self.database.insert([ModelWithConstraints(f1=101, f2='x')])
            # Modifying constraints
            self.database.migrate('tests.sample_migrations', 17)
            self.database.insert([ModelWithConstraints(f1=99, f2='a')])
            with self.assertRaises(ServerError):
                self.database.insert([ModelWithConstraints(f1=101, f2='a')])
            with self.assertRaises(ServerError):
                self.database.insert([ModelWithConstraints(f1=99, f2='x')])

        if self.database.server_version >= (20, 1, 2, 4):
            # Creating indexes
            self.database.migrate('tests.sample_migrations', 18)
            self.assertTrue(self.table_exists(ModelWithIndex))
            self.assertIn('INDEX index ', self.get_table_def(ModelWithIndex))
            self.assertIn('INDEX another_index ',
                          self.get_table_def(ModelWithIndex))
            # Modifying indexes
            self.database.migrate('tests.sample_migrations', 19)
            self.assertNotIn('INDEX index ',
                             self.get_table_def(ModelWithIndex))
            self.assertIn('INDEX index2 ', self.get_table_def(ModelWithIndex))
            self.assertIn('INDEX another_index ',
                          self.get_table_def(ModelWithIndex))
class CompressedFieldsTestCase(unittest.TestCase):
    def setUp(self):
        self.database = Database('test-db', log_statements=True)
        self.database.create_table(CompressedModel)

    def tearDown(self):
        self.database.drop_database()

    def test_defaults(self):
        # Check that all fields have their explicit or implicit defaults
        instance = CompressedModel()
        self.database.insert([instance])
        self.assertEqual(instance.date_field, datetime.date(1970, 1, 1))
        self.assertEqual(instance.datetime_field,
                         datetime.datetime(1970, 1, 1, tzinfo=pytz.utc))
        self.assertEqual(instance.string_field, 'dozo')
        self.assertEqual(instance.int64_field, 42)
        self.assertEqual(instance.float_field, 0)
        self.assertEqual(instance.nullable_field, None)
        self.assertEqual(instance.array_field, [])

    def test_assignment(self):
        # Check that all fields are assigned during construction
        kwargs = dict(uint64_field=217,
                      date_field=datetime.date(1973, 12, 6),
                      datetime_field=datetime.datetime(2000,
                                                       5,
                                                       24,
                                                       10,
                                                       22,
                                                       tzinfo=pytz.utc),
                      string_field='aloha',
                      int64_field=-50,
                      float_field=3.14,
                      nullable_field=-2.718281,
                      array_field=['123456789123456', '', 'a'])
        instance = CompressedModel(**kwargs)
        self.database.insert([instance])
        for name, value in kwargs.items():
            self.assertEqual(kwargs[name], getattr(instance, name))

    def test_string_conversion(self):
        # Check field conversion from string during construction
        instance = CompressedModel(date_field='1973-12-06',
                                   int64_field='100',
                                   float_field='7',
                                   nullable_field=None,
                                   array_field='[a,b,c]')
        self.assertEqual(instance.date_field, datetime.date(1973, 12, 6))
        self.assertEqual(instance.int64_field, 100)
        self.assertEqual(instance.float_field, 7)
        self.assertEqual(instance.nullable_field, None)
        self.assertEqual(instance.array_field, ['a', 'b', 'c'])
        # Check field conversion from string during assignment
        instance.int64_field = '99'
        self.assertEqual(instance.int64_field, 99)

    def test_to_dict(self):
        instance = CompressedModel(date_field='1973-12-06',
                                   int64_field='100',
                                   float_field='7',
                                   array_field='[a,b,c]')
        self.assertDictEqual(
            instance.to_dict(), {
                "date_field":
                datetime.date(1973, 12, 6),
                "int64_field":
                100,
                "float_field":
                7.0,
                "datetime_field":
                datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
                "alias_field":
                NO_VALUE,
                'string_field':
                'dozo',
                'nullable_field':
                None,
                'uint64_field':
                0,
                'array_field': ['a', 'b', 'c']
            })
        self.assertDictEqual(
            instance.to_dict(include_readonly=False), {
                "date_field":
                datetime.date(1973, 12, 6),
                "int64_field":
                100,
                "float_field":
                7.0,
                "datetime_field":
                datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
                'string_field':
                'dozo',
                'nullable_field':
                None,
                'uint64_field':
                0,
                'array_field': ['a', 'b', 'c']
            })
        self.assertDictEqual(
            instance.to_dict(include_readonly=False,
                             field_names=('int64_field', 'alias_field',
                                          'datetime_field')),
            {
                "int64_field":
                100,
                "datetime_field":
                datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc)
            })

    def test_confirm_compression_codec(self):
        if self.database.server_version < (19, 17):
            raise unittest.SkipTest('ClickHouse version too old')
        instance = CompressedModel(date_field='1973-12-06',
                                   int64_field='100',
                                   float_field='7',
                                   array_field='[a,b,c]')
        self.database.insert([instance])
        r = self.database.raw(
            "select name, compression_codec from system.columns where table = '{}' and database='{}' FORMAT TabSeparatedWithNamesAndTypes"
            .format(instance.table_name(), self.database.db_name))
        lines = r.splitlines()
        field_names = parse_tsv(lines[0])
        field_types = parse_tsv(lines[1])
        data = [tuple(parse_tsv(line)) for line in lines[2:]]
        self.assertListEqual(data,
                             [('uint64_field', 'CODEC(ZSTD(10))'),
                              ('datetime_field', 'CODEC(Delta(4), ZSTD(1))'),
                              ('date_field', 'CODEC(Delta(4), ZSTD(22))'),
                              ('int64_field', 'CODEC(LZ4)'),
                              ('string_field', 'CODEC(LZ4HC(10))'),
                              ('nullable_field', 'CODEC(ZSTD(1))'),
                              ('array_field', 'CODEC(Delta(2), LZ4HC(0))'),
                              ('float_field', 'CODEC(NONE)'),
                              ('alias_field', 'CODEC(ZSTD(4))')])
class DatabaseTestCase(unittest.TestCase):

    def setUp(self):
        self.database = Database('test-db')
        self.database.create_table(Person)
        self.database.create_table(PersonBuffer)

    def tearDown(self):
        self.database.drop_table(PersonBuffer)
        self.database.drop_table(Person)
        self.database.drop_database()

    def _insert_and_check(self, data, count):
        self.database.insert(data)
        self.assertEquals(count, self.database.count(Person))
        for instance in data:
            self.assertEquals(self.database, instance.get_database())

    def _insert_and_check_buffer(self, data, count):
        self.database.insert(data)
        self.assertEquals(count, self.database.count(PersonBuffer))

    def test_insert__generator(self):
        self._insert_and_check(self._sample_data(), len(data))

    def test_insert__list(self):
        self._insert_and_check(list(self._sample_data()), len(data))

    def test_insert__iterator(self):
        self._insert_and_check(iter(self._sample_data()), len(data))

    def test_insert__empty(self):
        self._insert_and_check([], 0)

    def test_count(self):
        self.database.insert(self._sample_data())
        self.assertEquals(self.database.count(Person), 100)
        self.assertEquals(self.database.count(Person, "first_name = 'Courtney'"), 2)
        self.assertEquals(self.database.count(Person, "birthday > '2000-01-01'"), 22)
        self.assertEquals(self.database.count(Person, "birthday < '1970-03-01'"), 0)

    def test_select(self):
        self._insert_and_check(self._sample_data(), len(data))
        query = "SELECT * FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name"
        results = list(self.database.select(query, Person))
        self.assertEquals(len(results), 2)
        self.assertEquals(results[0].last_name, 'Durham')
        self.assertEquals(results[0].height, 1.72)
        self.assertEquals(results[1].last_name, 'Scott')
        self.assertEquals(results[1].height, 1.70)
        self.assertEqual(results[0].get_database(), self.database)
        self.assertEqual(results[1].get_database(), self.database)

    def test_select_partial_fields(self):
        self._insert_and_check(self._sample_data(), len(data))
        query = "SELECT first_name, last_name FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name"
        results = list(self.database.select(query, Person))
        self.assertEquals(len(results), 2)
        self.assertEquals(results[0].last_name, 'Durham')
        self.assertEquals(results[0].height, 0) # default value
        self.assertEquals(results[1].last_name, 'Scott')
        self.assertEquals(results[1].height, 0) # default value
        self.assertEqual(results[0].get_database(), self.database)
        self.assertEqual(results[1].get_database(), self.database)

    def test_select_ad_hoc_model(self):
        self._insert_and_check(self._sample_data(), len(data))
        query = "SELECT * FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name"
        results = list(self.database.select(query))
        self.assertEquals(len(results), 2)
        self.assertEquals(results[0].__class__.__name__, 'AdHocModel')
        self.assertEquals(results[0].last_name, 'Durham')
        self.assertEquals(results[0].height, 1.72)
        self.assertEquals(results[1].last_name, 'Scott')
        self.assertEquals(results[1].height, 1.70)
        self.assertEqual(results[0].get_database(), self.database)
        self.assertEqual(results[1].get_database(), self.database)

    def test_pagination(self):
        self._insert_and_check(self._sample_data(), len(data))
        # Try different page sizes
        for page_size in (1, 2, 7, 10, 30, 100, 150):
            # Iterate over pages and collect all intances
            page_num = 1
            instances = set()
            while True:
                page = self.database.paginate(Person, 'first_name, last_name', page_num, page_size)
                self.assertEquals(page.number_of_objects, len(data))
                self.assertGreater(page.pages_total, 0)
                [instances.add(obj.to_tsv()) for obj in page.objects]
                if page.pages_total == page_num:
                    break
                page_num += 1
            # Verify that all instances were returned
            self.assertEquals(len(instances), len(data))

    def test_pagination_last_page(self):
        self._insert_and_check(self._sample_data(), len(data))
        # Try different page sizes
        for page_size in (1, 2, 7, 10, 30, 100, 150):
            # Ask for the last page in two different ways and verify equality
            page_a = self.database.paginate(Person, 'first_name, last_name', -1, page_size)
            page_b = self.database.paginate(Person, 'first_name, last_name', page_a.pages_total, page_size)
            self.assertEquals(page_a[1:], page_b[1:])
            self.assertEquals([obj.to_tsv() for obj in page_a.objects], 
                              [obj.to_tsv() for obj in page_b.objects])

    def test_pagination_invalid_page(self):
        self._insert_and_check(self._sample_data(), len(data))
        for page_num in (0, -2, -100):
            with self.assertRaises(ValueError):
                self.database.paginate(Person, 'first_name, last_name', page_num, 100)

    def test_special_chars(self):
        s = u'אבגד \\\'"`,.;éåäöšž\n\t\0\b\r'
        p = Person(first_name=s)
        self.database.insert([p])
        p = list(self.database.select("SELECT * from $table", Person))[0]
        self.assertEquals(p.first_name, s)

    def test_readonly(self):
        orig_database = self.database
        self.database = Database(orig_database.db_name, readonly=True)
        with self.assertRaises(DatabaseException):
            self._insert_and_check(self._sample_data(), len(data))
        self.assertEquals(self.database.count(Person), 0)
        with self.assertRaises(DatabaseException):
            self.database.drop_table(Person)
        with self.assertRaises(DatabaseException):
            self.database.drop_database()
        self.database = orig_database

    def test_insert_buffer(self):
        self._insert_and_check_buffer(self._sample_buffer_data(), len(data))

    def _sample_data(self):
        for entry in data:
            yield Person(**entry)

    def test_raw(self):
        self._insert_and_check(self._sample_data(), len(data))
        query = "SELECT * FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name"
        results = self.database.raw(query)
        self.assertEqual(results, "Whitney\tDurham\t1977-09-15\t1.72\nWhitney\tScott\t1971-07-04\t1.7\n")

    def test_insert_readonly(self):
        m = ReadOnlyModel(name='readonly')
        with self.assertRaises(DatabaseException):
            self.database.insert([m])

    def test_create_readonly_table(self):
        with self.assertRaises(DatabaseException):
            self.database.create_table(ReadOnlyModel)

    def test_drop_readonly_table(self):
        with self.assertRaises(DatabaseException):
            self.database.drop_table(ReadOnlyModel)

    def _sample_buffer_data(self):
        for entry in data:
            yield PersonBuffer(**entry)