예제 #1
0
    def create_table_sql(self, db):
        name = self.__class__.__name__
        if self.replica_name:
            name = 'Replicated' + name

        # In ClickHouse 1.1.54310 custom partitioning key was introduced
        # https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
        # Let's check version and use new syntax if available
        if db.server_version >= (1, 1, 54310):
            partition_sql = "PARTITION BY %s ORDER BY %s" \
                            % ('(%s)' % comma_join(self.partition_key), '(%s)' % comma_join(self.order_by))

            if self.sampling_expr:
                partition_sql += " SAMPLE BY %s" % self.sampling_expr

            partition_sql += " SETTINGS index_granularity=%d" % self.index_granularity

        elif not self.date_col:
            # Can't import it globally due to circular import
            from infi.clickhouse_orm.database import DatabaseException
            raise DatabaseException("Custom partitioning is not supported before ClickHouse 1.1.54310. "
                                    "Please update your server or use date_col syntax."
                                    "https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/")
        else:
            partition_sql = ''

        params = self._build_sql_params(db)
        return '%s(%s) %s' % (name, comma_join(params), partition_sql)
예제 #2
0
def _init(self,
          db_name,
          db_url='http://localhost:8123/',
          username=None,
          password=None,
          readonly=False,
          autocreate=True,
          timeout=60,
          verify_ssl_cert=True,
          ssl_cert=None,
          log_statements=False):
    '''
    Initializes a database instance. Unless it's readonly, the database will be
    created on the ClickHouse server if it does not already exist.

    - `db_name`: name of the database to connect to.
    - `db_url`: URL of the ClickHouse server.
    - `username`: optional connection credentials.
    - `password`: optional connection credentials.
    - `readonly`: use a read-only connection.
    - `autocreate`: automatically create the database if it does not exist (unless in readonly mode).
    - `timeout`: the connection timeout in seconds.
    - `verify_ssl_cert`: whether to verify the server's certificate when connecting via HTTPS.
    - `ssl_cert`: certificate and key when connecting via HTTPS.
    - `log_statements`: when True, all database statements are logged.
    '''
    self.username = username
    self.password = password
    self.db_name = db_name
    self.db_url = db_url
    self.readonly = False
    self.timeout = timeout
    self.request_session = requests.Session()
    self.request_session.verify = verify_ssl_cert
    self.request_session.cert = ssl_cert
    if username:
        self.request_session.auth = (username, password or '')
    self.log_statements = log_statements
    self.settings = {}
    self.db_exists = False
    self.db_exists = self._is_existing_database()
    if readonly:
        if not self.db_exists:
            raise DatabaseException(
                'Database does not exist, and cannot be created under readonly connection'
            )
        self.connection_readonly = self._is_connection_readonly()
        self.readonly = True
    elif autocreate and not self.db_exists:
        self.create_database()
    self.server_version = self._get_server_version()
    # Versions 1.1.53981 and below don't have timezone function
    self.server_timezone = (self._get_server_timezone() if
                            self.server_version > (1, 1, 53981) else pytz.utc)
    # Versions 19.1.16 and above support codec compression
    self.has_codec_support = self.server_version >= (19, 1, 16)
    # Version 19.0 and above support LowCardinality
    self.has_low_cardinality_support = self.server_version >= (19, 0)
예제 #3
0
파일: database.py 프로젝트: qvp/aiochorm
    async def insert_async(self, model_instances, batch_size=1000):
        '''
        Insert records into the database.

        - `model_instances`: any iterable containing instances of a single model class.
        - `batch_size`: number of records to send per chunk (use a lower number if your records are very large).
        '''
        from six import next
        i = iter(model_instances)
        try:
            first_instance = next(i)
        except StopIteration:
            return  # model_instances is empty
        first_instance.set_database(self)
        model_class = first_instance.__class__

        if first_instance.is_read_only() or first_instance.is_system_model():
            raise DatabaseException(
                "You can't insert into read only and system tables")

        fields_list = ','.join(
            ['`%s`' % name for name in first_instance.fields(writable=True)])

        def gen():
            values = list()
            values.append(first_instance.to_dict(include_readonly=False))
            # Collect lines in batches of batch_size
            lines = 2
            for instance in i:
                instance.set_database(self)
                values.append(instance.to_dict(include_readonly=False))
                lines += 1
                if lines >= batch_size:
                    # Return the current batch of lines
                    yield values
                    # Start a new batch
                    values = list()
                    lines = 0
            # Return any remaining lines in partial batch
            if lines:
                yield values

        for butch in gen():
            query = self._substitute(
                'INSERT INTO $table (%s) VALUES ' % fields_list, model_class)
            await self._send(query, settings=butch)
예제 #4
0
    def insert_tuples(self,
                      model_class: Type['ClickHouseModel'],
                      model_tuples: Iterable[tuple],
                      batch_size: Optional[int] = None,
                      formatted: bool = False) -> None:
        """
        Inserts model_class namedtuples
        :param model_class: ClickHouse model, namedtuples are made from
        :param model_tuples: An iterable of tuples to insert
        :param batch_size: Size of batch
        :param formatted: If flag is set, tuples are expected to be ready to insert without calling field.to_db_string
        :return: None
        """
        tuples_iterator = iter(model_tuples)

        try:
            first_tuple = next(tuples_iterator)
        except StopIteration:
            return  # model_instances is empty

        if model_class.is_read_only() or model_class.is_system_model():
            raise DatabaseException(
                "You can't insert into read only and system tables")

        fields_list = ','.join('`%s`' % name for name in first_tuple._fields)
        fields_dict = model_class.fields(writable=True)
        statsd_key = "%s.inserted_tuples.%s" % (config.STATSD_PREFIX,
                                                model_class.__name__)

        query = 'INSERT INTO `%s`.`%s` (%s) FORMAT TabSeparated\n' \
                % (self.db_name, model_class.table_name(), fields_list)
        query_enc = query.encode('utf-8')

        def tuple_to_csv(tup):
            if formatted:
                str_gen = (getattr(tup, field_name)
                           for field_name in first_tuple._fields)
            else:
                str_gen = (fields_dict[field_name].to_db_string(getattr(
                    tup, field_name),
                                                                quote=False)
                           for field_name in first_tuple._fields)

            return '%s\n' % '\t'.join(str_gen)

        def gen():
            buf = BytesIO()
            buf.write(query_enc)
            buf.write(tuple_to_csv(first_tuple).encode('utf-8'))

            # Collect lines in batches of batch_size
            lines = 1
            for t in tuples_iterator:
                buf.write(tuple_to_csv(t).encode('utf-8'))

                lines += 1
                if batch_size is not None and lines >= batch_size:
                    # Return the current batch of lines
                    statsd.incr(statsd_key, lines)
                    yield buf.getvalue()
                    # Start a new batch
                    buf = BytesIO()
                    buf.write(query_enc)
                    lines = 0

            # Return any remaining lines in partial batch
            if lines:
                statsd.incr(statsd_key, lines)
                yield buf.getvalue()

        # For testing purposes
        for data in gen():
            with statsd.timer(statsd_key):
                logger.debug('django-clickhouse: insert tuple: %s' % data)
                self._send(data)