def create_adver_property_table(self, table_name): client = Client(host='localhost') DDL_sql = """ CREATE TABLE IF NOT EXISTS TEST.{0} ( STATS_DTTM String, ADVER_ID Nullable(String), CTGR_SEQ_3 Nullable(String), CTGR_NM_3 Nullable(String), CTGR_SEQ_2 Nullable(String), CTGR_NM_2 Nullable(String), CTGR_SEQ_1 Nullable(String), CTGR_NM_1 Nullable(String) ) ENGINE = MergeTree PARTITION BY STATS_DTTM ORDER BY STATS_DTTM SETTINGS index_granularity=8192 """.format(table_name) result = client.execute(DDL_sql) return result
def test_ssl(self): c = Client.from_url('clickhouses://host?' 'verify=false&' 'ssl_version=PROTOCOL_SSLv23&' 'ca_certs=/tmp/certs&' 'ciphers=HIGH:-aNULL:-eNULL:-PSK:RC4-SHA:RC4-MD5') assert c.connection.ssl_options == { 'ssl_version': ssl.PROTOCOL_SSLv23, 'ca_certs': '/tmp/certs', 'ciphers': 'HIGH:-aNULL:-eNULL:-PSK:RC4-SHA:RC4-MD5' }
def dbms_sync_clickhouse_schema(row): ignored_schemas = ('_temporary_and_external_tables', 'system', 'default') query = f"select name from system.databases where name not in {ignored_schemas}" config = { 'host': row.host, 'port': row.port, 'database': 'default', 'connect_timeout': 5, 'send_receive_timeout': 5, } # 请在clickhouse创建好用户 config.update(REOMOTE_USER) cnx = Client(**config) result = cnx.execute(query) for i in result: schema = i[0] models.DbSchemas.objects.update_or_create(cid_id=row.id, schema=schema, defaults={'schema': schema}) cnx.disconnect()
def __init__(self, credentials): self.host = credentials['host'] self.db = credentials['db'] self.ch = sa.create_engine( f"clickhouse://*****:*****@{credentials['host']}:8123/{credentials['db']}" ) self.client = Client(host=credentials['host'], password=credentials['password']) self.client.execute('CREATE DATABASE IF NOT EXISTS amdattds;') self.pandahouse_host = f"http://{credentials['host']}:8123" self.conn = { 'host': self.pandahouse_host, 'database': self.db, 'password': credentials['password'] } self.session = make_session(self.ch) metadata = MetaData(bind=self.ch) metadata.reflect(bind=self.ch) self.connection = self.ch.connect()
def _create_conn(self) -> Client: return Client( host=self.host, port=self.port, user=self.user, password=self.password, database=self.database, connect_timeout=self.connect_timeout, send_receive_timeout=self.send_receive_timeout, settings=self.client_settings, )
def create_entire_log_table(self, table_name): client = Client(host='localhost') DDL_sql = """ CREATE TABLE IF NOT EXISTS {0}.{1} ( mediaId String, inventoryId String, frameId String, logType String, adType String, adProduct String, adCampain String, adverId String, productCode String, cpoint Decimal(13, 2), mpoint Decimal(13, 2), auid String, remoteIp String, platform String, device String, browser String, createdDate DateTime default now(), freqLog Nullable(String), tTime Nullable(String), kno Nullable(String), kwrdSeq Nullable(String), gender Nullable(String), age Nullable(String), osCode Nullable(String), price Nullable(Decimal(13, 2)), frameCombiKey Nullable(String) ) engine = MergeTree() PARTITION BY toYYYYMMDD(createdDate) PRIMARY KEY (mediaId, inventoryId, adverId) ORDER BY (mediaId, inventoryId, adverId) SAMPLE BY mediaId TTL createdDate + INTERVAL 90 DAY SETTINGS index_granularity = 8192 """.format(self.local_clickhouse_db_name, table_name) result = client.execute(DDL_sql) return result
def get_analyzed_info(shukach_ids): if len(shukach_ids) == 0: return [] client = Client(settings.CLICKHOUSE_HOST, database=settings.CLICKHOUSE_DB, user=settings.CLICKHOUSE_USER, password=settings.CLICKHOUSE_PASSWORD) query = 'select UrlId, Platform, Browser, Country, Age, Gender, Income, count(distinct IntVisKey), Sum(Views) ' \ 'from admixer.UrlStat ' \ 'where UrlId in (%s) ' \ 'Group by UrlId, Platform, Browser, Country, Age, Gender, Income' % (",".join("'%d'" % item for item in shukach_ids)) response = client.execute(query) keys = ('url_id', 'platform', 'browser', 'region', 'age', 'gender', 'income', 'uniques', 'views') results = [] for row in response: item = dict(zip(keys, row)) item['url_id'] = int(row[0]) results.append(item) return results
def _run_schema(conn: Client, schema: Schema) -> None: if not isinstance(schema, TableSchema): return clickhouse_table = schema.get_local_table_name() local_schema = get_local_schema(conn, clickhouse_table) migrations = schema.get_migration_statements()(clickhouse_table, local_schema) for statement in migrations: logger.info(f"Executing migration: {statement}") conn.execute(statement) # Refresh after alters refreshed_schema = get_local_schema(conn, clickhouse_table) # Warn user about any *other* schema diffs differences = schema.get_column_differences(refreshed_schema) for difference in differences: logger.warn(difference)
def __conn_clickhouse(cls): try: client = Client(cls.clickhouse_host, user=cls.clickhouse_user, password=cls.clickhouse_passwd, database=cls.clickhouse_db) #return client except Exception as err: logging.error("Filter Error connecting to database of clickhouse.") sys.exit(1) return client
def get_local_schema( conn: Client, table_name: str) -> Mapping[str, ColumnType[MigrationModifiers]]: return { column_name: _get_column(column_type, default_type, default_expr, codec_expr) for column_name, column_type, default_type, default_expr, _comment, codec_expr in [ cols[:6] for cols in conn.execute("DESCRIBE TABLE %s" % table_name).results ] }
def __init__(self, user='******', password='', host='localhost', port=9000, db_name=None): self.client = Client.from_url('clickhouse://{}:{}@{}:{}'.format( user, password, host, port)) self._test_connection() self.name = db_name self._create_database() self._create_tables()
def test_opentelemetry(self): c = Client.from_url( 'clickhouse://host?opentelemetry_traceparent=' '00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-00') self.assertEqual( c.connection.context.client_settings['opentelemetry_traceparent'], '00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-00') self.assertEqual( c.connection.context.client_settings['opentelemetry_tracestate'], '') c = Client.from_url( 'clickhouse://host?opentelemetry_traceparent=' '00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-00&' 'opentelemetry_tracestate=state') self.assertEqual( c.connection.context.client_settings['opentelemetry_traceparent'], '00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-00') self.assertEqual( c.connection.context.client_settings['opentelemetry_tracestate'], 'state')
def create_shop_property_table(self, table_name): client = Client(host='localhost') DDL_sql = """ CREATE TABLE IF NOT EXISTS TEST.{0} ( STATS_DTTM String, ADVER_ID Nullable(String), PCODE Nullable(String), PRODUCT_CATE_NO Nullable(String), FIRST_CATE Nullable(String), SECOND_CATE Nullable(String), THIRD_CATE Nullable(String), PNM Nullable(String), PRICE Nullable(UInt16) ) ENGINE = MergeTree PARTITION BY STATS_DTTM ORDER BY STATS_DTTM SETTINGS index_granularity=8192 """.format(table_name) result = client.execute(DDL_sql) return result
def painting(): client = Client('localhost') # Подключение к серверу. nums = [] for i in range(count_of_nums): nums.append(get_data(i + 1, client)) print(*nums, sep='\n') graph(nums) if sys.flags.interactive != 1 or not hasattr(qt.QtCore, 'PYQT_VERSION'): qt.QtGui.QApplication.exec_()
def get_query_timing_info(random_id: str, conn: Client) -> Tuple[str, Dict]: conn.execute("SYSTEM FLUSH LOGS") results = conn.execute( """ SELECT query_id, event_time, query_duration_ms, read_rows, formatReadableSize(read_bytes) as read_size, result_rows, formatReadableSize(result_bytes) as result_size, formatReadableSize(memory_usage) as memory_usage FROM system.query_log WHERE query NOT LIKE '%%query_log%%' AND match(query, %(expr)s) AND type = 'QueryFinish' LIMIT 1 """, {"expr": f"analyze_query:{random_id}"}, ) return ( results[0][0], dict( zip( [ "query_id", "event_time", "query_duration_ms", "read_rows", "read_size", "result_rows", "result_size", "memory_usage", ], results[0], ) ), )
def delete_data_in_ch(self): client = Client(**CLICKHOUSE_DB_INFO) delete_data_last = {} if not self.delete_data_list: self.time_cost_last_delete = 0 else: logger.info( "begin group data with primary_key_values to each table") for i in self.delete_data_list: delete_data_last.setdefault(i['table_name'], []).append(i['primary_key_values']) logger.info("group each table data for one sql") for table_name, values in delete_data_last.items(): sql_cur = {} for v in values: for k, vv in v.items(): sql_cur.setdefault(k, []).append(vv) last_delete = ' and '.join( ['%s in %s' % (k, tuple(v)) for k, v in sql_cur.items()]) exec_sql = "alter table %s delete where %s ;" % (table_name, last_delete) begin_delete_time = int(time.time()) logger.info("begin execute: exec_sql:%s" % exec_sql) client.execute(exec_sql) logger.info("end execute") client.disconnect() self.time_cost_last_delete = int( time.time()) - begin_delete_time
class DBConnection: def __init__(self, clickhouse_host=None, clickhouse_name=None, dbuser=None, dbpass=None, db_query=None): self.db_query = db_query self.clickhouse_client = Client( host=clickhouse_host, database=clickhouse_name, user=dbuser, password=dbpass ) try_conn = self._connection_test() if try_conn: raise try_conn self.connection_status = False def make_connection(self): try: self.clickhouse_client.connection.connect() except Exception as error: logger.error(error) return False self.connection_status = True return True def _connection_test(self): try: self.clickhouse_client.connection.connect() except errors.Error as error: logger.error(exception_to_logger(error)) return "Error of database connection." self.clickhouse_client.disconnect() return None def send_request(self, trace=False): response_status = False try: if isinstance(self.db_query, tuple): response = self.clickhouse_client.execute( self.db_query[0], self.db_query[1] ) else: response = self.clickhouse_client.execute(self.db_query) response_status = True except Exception as error: logger.error(exception_to_logger(error)) response = json_str_error("Error in sql query!") if trace: print_statusbar([(error, 'd',)]) return response_status, response def disconnect(self): self.clickhouse_client.disconnect() self.connection_status = False
class Listener(AnalystService_pb2_grpc.AnalystServiceServicer): """The listener function implemests the rpc call as described in the .proto file""" def __init__(self): self.counter = 0 self.last_print_time = time.time() self.client = Client(host='78.140.223.19', password='******') def __str__(self): return self.__class__.__name__ def Analyse(self, request, context): data = request.message print('message-', request.message) critical_v = self.client.execute('SELECT * FROM test.critical')[0] print('criticals- ', critical_v) result = [] flag = False counter = 0 for i in range(len(data[:-1])): if data[i] < critical_v[i]: result.append(1) flag = True else: result.append(0) result.append(data[-1]) print('Chats - ', chatids) if flag: self.client.execute(f'INSERT INTO test.warnings (Pressure, Humidity, TemperatureR, TemperatureA, pH, FlowRate, CO, EventTime) VALUES ({",".join(map(str, result))})') for i in chatids: bot.send_message(i, 'Warning on sensors: ' + ",".join(map(str, [names[j] for j in range(len(names)) if result[j] == 1]))) print('message sended to ', i) return AnalystService_pb2.Out()
def create_table(config: Dict[str, Any]) -> None: while True: try: client = Client(config['db']['host'], config['db']['port']) client.execute( '''CREATE TABLE IF NOT EXISTS LOB( symbol String, event_date DateTime, update_id UInt64, price Float64, amount Float64, is_bid UInt8 ) ENGINE = MergeTree() PARTITION BY toYYYYMM(event_date) ORDER BY (symbol, event_date, update_id, price)''' ) except NetworkError: logger.error('Clickhouse DB connection error. Retry after 5s...') sleep(5) else: break
def _create_conn(self, use_fallback_host: bool = False) -> Client: if use_fallback_host: (fallback_host, fallback_port) = self.get_fallback_host() return Client( host=(self.host if not use_fallback_host else fallback_host), port=(self.port if not use_fallback_host else fallback_port), user=self.user, password=self.password, database=self.database, connect_timeout=self.connect_timeout, send_receive_timeout=self.send_receive_timeout, settings=self.client_settings, )
def client_ck(host=None, port=None, user=None, password=None, database=None, collection=None): """链接ck数据库""" conn = Client(host=host, port=port, user=user, password=password, database=database) return conn
class Max1(AggregationPrimitive): """Finds the maximum non-null value of a numeric feature.""" name = "max1" input_types = [Numeric] return_type = None # max_stack_depth = 1 stack_on_self = False client = Client('localhost') def get_function(self): def click_house_max(x): agg_fun = "("+x+")" return self.client.execute("select %s from bureau group by SK_ID_CURR "%(agg_fun)) return click_house_max
class Table: """ Representation of ClickHouse table. Before use, make sure that docker container is running (more in README.md). """ client = Client(host='localhost') def __init__(self, name, attributes): self.table_name = name self.rows = len(self.get_values()) self.cols = len(attributes) def get_values(self): return self.client.execute( f'select * from {self.table_name} order by name') def get_by_name(self, name): return self.client.execute( f"select * from {self.table_name} where name='{name}'") def update_by_name(self, name, value_to_update=None): """ :param name: filter to search for row :param value_to_update: tuple of 2 elements. Used in where closet. e.g. ('duration', '2') will set duration to 2. Note! if your value (second arg.) is string, then use double quotes: ('name', "'my task'"), since by default it doesn't put quotes in query. """ assert value_to_update is not None, 'pls provide value to update' assert type(value_to_update) == tuple assert len(value_to_update) == 2 self.client.execute( f"" f"alter table {self.table_name} " f"update {value_to_update[0]} = {value_to_update[1]} " f"where name = '{name}'") @staticmethod def query(text): return Table.client.execute(text) def add(self, **values): self.client.execute(f'insert into {self.table_name} values', [values]) self.rows += 1 def delete_by_name(self, name): self.client.execute(f"" f"alter table {self.table_name} " f"delete where name = '{name}'") self.rows -= 1
def create_new_local_table(self, table_name): client = Client(host='localhost') DDL_sql = """ CREATE TABLE IF NOT EXISTS {0}.{1} ( LOG_DTTM DateTime('Asia/Seoul'), RANDOM_SAMPLE Float32, STATS_DTTM UInt32, STATS_HH UInt8, STATS_MINUTE UInt8, MEDIA_SCRIPT_NO String, SITE_CODE String, ADVER_ID String, REMOTE_IP String, ADVRTS_PRDT_CODE Nullable(String), ADVRTS_TP_CODE Nullable(String), PLTFOM_TP_CODE Nullable(String), PCODE Nullable(String), BROWSER_CODE Nullable(String), FREQLOG Nullable(String), T_TIME Nullable(String), KWRD_SEQ Nullable(String), GENDER Nullable(String), AGE Nullable(String), OS_CODE Nullable(String), FRAME_COMBI_KEY Nullable(String), CLICK_YN UInt8, BATCH_DTTM DateTime ) ENGINE = MergeTree PARTITION BY ( STATS_DTTM, STATS_MINUTE ) PRIMARY KEY (STATS_DTTM, STATS_MINUTE) ORDER BY (STATS_DTTM, STATS_MINUTE) SAMPLE BY ( STATS_MINUTE ) TTL BATCH_DTTM + INTERVAL 90 DAY SETTINGS index_granularity=8192 """.format(self.local_clickhouse_db_name, table_name) result = client.execute(DDL_sql) return True
class Check(object): def __init__(self, *args, **kwargs): self.conn = Client(*args, **kwargs) self.code = Code() def execute(self, *args, **kwargs) -> List[tuple]: """ Wrapper to execute ClickHouse SQL """ if len(args) == 1: logger.info('Execute query: {}'.format(args[0])) elif len(args) >= 2: logger.info('Execute query: {}'.format( self.conn.substitute_params(args[0], args[1]))) return self.conn.execute(*args, **kwargs) def execute_dict(self, *args, **kwargs) -> List[dict]: """ Wrapper around execute() to return list of rows as dict """ kwargs['with_column_types'] = True rows, columns = self.execute(*args, **kwargs) result = [{columns[i][0]: v for i, v in enumerate(r)} for r in rows] return result def exit(self, message: str) -> ExitStruct: message = self.code.name + ': ' + message return self.code.current, message def check_config(self, config: dict, keys: set): """ Checks if all mandatory keys are presented in the config dict """ keys_in_config = config.keys() & keys if keys_in_config != keys: raise KeyError('Not all of {} presented in config: {}'.format( keys, config))
def check_table_exists_on_clickhouse(self): """ 检查mysql需要同步的表在clickhouse里面是否存在 :return: """ client = Client(**CLICKHOUSE_DB_INFO) not_in_clickhouse_tables = [] if not self.rsnyc_tables: messages = "no tables to sync " logger.error(messages) return False else: try: for i in self.rsnyc_tables: table_schema = i['table_schema'] table_name = i['table_name'] check_sql = """select count(*) countnum from system.tables where database= '%s' and name='%s';""" % ( table_schema, table_name) countnum = client.execute(check_sql) if countnum[0][0] == 0: not_in_clickhouse_tables.append(table_schema + '.' + table_name) except Exception as error: messages = "clickhouse failed: %s" % (str(error)) logger.error(messages) finally: if client: client.disconnect() if not_in_clickhouse_tables: messages = "tables: %s not in clickhouse" % ( ','.join(not_in_clickhouse_tables)) logger.error(messages) return False else: messages = "tables all in clickhouse" logger.info(messages) return True
class pandasConnector(object): def __init__(self, host, user='******', password='', db=''): self.db= db self.conn = Client(host=host, user=user, password=password) self.useDB() self.checkTables() def useDB(self): self.dbCorrect= True try: useDb = self.conn.execute("use " + str(self.db), columnar=True) except: self.dbCorrect= False print("Wrong DB!") def checkTables(self): if self.dbCorrect: self.tables= self.conn.execute("show tables", columnar=True) if len(self.tables)>0: self.tables= list(self.tables[0]) def read_sql_query(self, query, tableName): dataDataFrame= pd.DataFrame() if self.dbCorrect: if len(self.tables)>0: if tableName in self.tables: dataList = self.conn.execute(query, columnar=True, with_column_types=True) if len(dataList)>0: dataAll= dataList[0] if len(dataAll)>0: columns = [x[0] for x in dataList[1]] #columns = self.conn.execute("DESC TABLE "+self.db+"."+tableName , columnar=True) dataDataFrame= pd.DataFrame(dataAll).T #columns = columns[0] dataDataFrame.columns = columns else: print("Wrong table name!") return (dataDataFrame)
def get_dfc(): """get the pandas data frame, for now settings kinda arbitrary/unsystematic""" client = Client(host='localhost') # print(time_periods[0], time_periods[-1], len(time_periods)) vrbls=['dncblt','gender','timb_brt','tonal','voice','mood_acoustic', 'mood_aggressive','mood_electronic','mood_happy','mood_party','mood_relaxed','mood_sad'] tprd = [date(2011,1,1), date(2011,3,31)] d1 = tprd[0].strftime('%Y-%m-%d') d2 = tprd[1].strftime('%Y-%m-%d') d1_dt = datetime.strptime(d1, '%Y-%m-%d') d2_dt = datetime.strptime(d2, '%Y-%m-%d') base_dt = datetime(1970, 1, 1) d1_int = (d1_dt - base_dt).days d2_int = (d2_dt - base_dt).days # tp_id = time_periods.index(tprd) tp_clm = d1 + ' -- ' + d2 min_cnt = 20 min_weight = 20 min_rel_weight = 0.2 min_tag_aprnc = 0.2 min_inst_cnt = 20 min_unq_artsts = 10 usr_dedcgs = 6 tag_plcnt = 10 unq_usrs = 10 max_propx1 = 0.5 max_propx2 = 0.7 ptn = '_all' dfc = get_dfs(vrbls, min_cnt, min_weight, min_rel_weight, min_tag_aprnc, min_unq_artsts, max_propx1, max_propx2, d1, d2, ptn, usr_dedcgs, tag_plcnt, unq_usrs, client, pd) return dfc
def create_media_property_table(self, table_name): client = Client(host='localhost') DDL_sql = """ CREATE TABLE IF NOT EXISTS TEST.{0} ( STATS_DTTM String, MEDIA_SCRIPT_NO Nullable(String), MEDIASITE_NO Nullable(String), MEDIA_ID Nullable(String), SCRIPT_TP_CODE Nullable(String), MEDIA_SIZE_CODE Nullable(String), ENDING_TYPE Nullable(String), M_BACON_YN Nullable(String), ADVRTS_STLE_TP_CODE Nullable(String), MEDIA_CATE_INFO Nullable(String), MEDIA_CATE_NAME Nullable(String) ) ENGINE = MergeTree PARTITION BY STATS_DTTM ORDER BY STATS_DTTM SETTINGS index_granularity=8192 """.format(table_name) result = client.execute(DDL_sql) return result
def insert_db(filename, host, table, backup=False): logging.info("Writing data to a table %s..." % table) client = Client(host) td = [['DataTime', 'String', 'Float32', 'UInt32']] sql = 'INSERT INTO %s (dt, st, fl, ui) FORMAT VALUES ' % table with open(filename, newline='') as csvfile: read = csv.reader(csvfile, delimiter=',') next(read) for row in read: if backup: td.append(row) client.execute( sql, [{ 'dt': datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S'), 'st':row[1], 'fl':numpy.float32(row[2]), 'ui':int(row[3]) }] ) logging.info("Done!") return td