def initial(self): if not self.nodatabase: if self.dbname in ['ck', 'clickhouse']: self.db = clickhouse_driver.Client( host=self._clickhouse_ip, port=self._clickhouse_port, user=self._clickhouse_user, password=self._clickhouse_password, database='qifi', settings={'insert_block_size': 100000000}, compression=True) self.reload_ck() else: if self.model == "BACKTEST": self.db = pymongo.MongoClient(self.trade_host).quantaxis else: self.db = pymongo.MongoClient(self.trade_host).QAREALTIME self.reload() else: """ 非数据库模式 不用 reload """ print('当前为 QIFIAccount::非数据库模式, 适用于测试/二次开发') if self.pre_balance == 0 and self.balance == 0 and self.model != "REAL": self.log('Create new Account') self.create_simaccount() self.sync()
def __init__( self, factor_name="QAF_test", host=clickhouse_ip, port=clickhouse_port, user=clickhouse_user, password=clickhouse_password, ): self.client = clickhouse_driver.Client(host=host, port=port, user=user, password=password, database='factor') self.client.execute("CREATE TABLE IF NOT EXISTS \ `factor`.`factormetadata` ( \ factorname String, \ create_date Date, \ update_date Date, \ version Float32, \ description String DEFAULT 'None')\ ENGINE=ReplacingMergeTree()\ ORDER BY (factorname)\ SETTINGS index_granularity=8192 ") self.factor_name = factor_name self.description = 'None' self.finit() if not self.check_if_exist(): print('start register') self.register() self.init_database()
def get_click(database): ''' get a clickhouse client object for the required database ''' return clickhouse_driver.Client(treemap_config.CLICK_HOST, database=database, compression=True)
def __init__(self, settings: Dict): self.settings = settings self._client = clickhouse_driver.Client( host=settings.get("host"), port=settings.get("port"), user=settings.get("user"), password=settings.get("password") or "", )
def save_to_clickhouse(**kwargs): ti = kwargs['ti'] j = ti.xcom_pull(task_ids='load_from_esearch')['res'] j = [dict((k, int(v) if isinstance(v, bool) else v) for k, v in _.items()) for _ in j] client = clickhouse_driver.Client('Localhost') client.execute('SET input_format_skip_unknown_fields=1;') client.execute('INSERT INTO clicks VALUES', j) return "Total clicks added: " + str(len(j))
def __init__(self, host="127.0.0.1", port="9000", user="******", password=""): self._client = clickhouse_driver.Client(host=host, port=port, user=user, password=password)
def __init__(self, host, port, password=None): if password is None: password = '******' try: self.__client = clickhouse_driver.Client(host, port, password=password) except Exception: print("Cannot connect to database.")
def __init__(self, username, trading_day): self.client = clickhouse_driver.Client( database='qifi', host='localhost', port=9000, user='******', settings={'insert_block_size': 100000000}, compression=True) self.qifi_id = self.get_qifiid(username, trading_day)
def connect(self): if self._client is not None: self.log.debug( 'Clickhouse client already exists. Pinging Clickhouse Server.') try: if self.ping_clickhouse(): self.service_check(self.SERVICE_CHECK_CONNECT, self.OK, tags=self._tags) return else: self.log.debug( 'Clickhouse connection ping failed. Attempting to reconnect' ) self._client = None except Exception as e: self.log.debug('Unexpected ping response from Clickhouse', exc_info=e) self.log.debug('Attempting to reconnect') self._client = None try: client = clickhouse_driver.Client( host=self._server, port=self._port, user=self._user, password=self._password, database=self._db, connect_timeout=self._connect_timeout, send_receive_timeout=self._read_timeout, sync_request_timeout=self._connect_timeout, compression=self._compression, secure=self._tls_verify, # Don't pollute the Agent logs settings={'calculate_text_stack_trace': False}, # Make every client unique for server logs client_name='datadog-{}'.format(self.check_id), ) client.connection.connect() except Exception as e: error = 'Unable to connect to ClickHouse: {}'.format( self._error_sanitizer.clean(self._error_sanitizer.scrub( str(e)))) self.service_check(self.SERVICE_CHECK_CONNECT, self.CRITICAL, message=error, tags=self._tags) # When an exception is raised in the context of another one, both will be printed. To avoid # this we set the context to None. https://www.python.org/dev/peps/pep-0409/ raise_from(type(e)(error), None) else: self.service_check(self.SERVICE_CHECK_CONNECT, self.OK, tags=self._tags) self._client = client
def __init__(self, host=clickhouse_ip, port=clickhouse_port, user=clickhouse_user, password=clickhouse_password) -> None: self.client = clickhouse_driver.Client(host=host, port=port, user=user, password=password, database='factor')
def __init__(self, host: str, user: str, password: str, cluster_name: str = None): host, port = host.split(":") self._client = clickhouse_driver.Client(host=host, port=port, password=password, user=user) self.cluster_name = cluster_name
def get_services_logs(services, level=None): ch_url = os.environ.get('KRAKEN_CLICKHOUSE_URL', consts.DEFAULT_CLICKHOUSE_URL) o = urlparse(ch_url) ch = clickhouse_driver.Client(host=o.hostname) query = "select time,message,service,host,level,tool from logs " where = [] params = {} for idx, s in enumerate(services): if s == 'all': continue param = 'service%d' % idx if '/' in s: s, t = s.split('/') tparam = 'tool%d' % idx where.append("(service = %%(%s)s and tool = %%(%s)s)" % (param, tparam)) params[param] = s params[tparam] = t else: where.append("service = %%(%s)s" % param) params[param] = s if where: where = " or ".join(where) where = "where (" + where + ") " if level: level = level.upper() if level == 'ERROR': lq = "level = 'ERROR'" elif level == 'WARNING': lq = "level in ('WARNING', 'ERROR')" else: lq = "level in ('INFO', 'WARNING', 'ERROR')" if where: where += "and %s " % lq else: where = "where %s " % lq if where: query += where query += " order by time desc, seq desc limit 1000" rows = ch.execute(query, params) logs = [] for r in reversed(rows): entry = dict(time=r[0], message=r[1], service=r[2], host=r[3], level=r[4], tool=r[5]) logs.append(entry) return {'items': logs, 'total': len(logs)}, 200
def connect_clickhouse(ch_host='10.121.221.14'): ch_port = '9000' ch_db = 'backblaze' ch_user = '******' ch_pwd = '' client = clickhouse_driver.Client(host=ch_host, port=ch_port, database=ch_db, user=ch_user, password=ch_pwd) return client
def __init__(self, job_id): self.job_id = job_id self.query = "select message from logs where job = %s order by time asc, seq asc" self.query %= job_id ch_url = os.environ.get('KRAKEN_CLICKHOUSE_URL', consts.DEFAULT_CLICKHOUSE_URL) o = urlparse(ch_url) self.ch = clickhouse_driver.Client(host=o.hostname) self.logs_queue = Queue() self.finished = Event() self.worker = None
def get_databases(): ''' returns the set available databases ''' click = clickhouse_driver.Client(treemap_config.CLICK_HOST, compression=True) rows = click.execute('show databases') databases = set() for row in rows: databases.add(row[0]) databases.remove('default') databases.remove('system') databases.remove('_temporary_and_external_tables') return databases
def __init__(self, host=clickhouse_ip, port=clickhouse_port, database='quantaxis', user=clickhouse_user, password=clickhouse_password): self.client = clickhouse_driver.Client( host=host, port=port, database=database, user=user, password=password, settings={'insert_block_size': 100000000}, compression=True)
def get_job_logs(job_id, start=0, limit=200, order=None, internals=False, filters=None): # pylint: disable=unused-argument if order not in [None, 'asc', 'desc']: abort(400, "incorrect order value: %s" % str(order)) job = Job.query.filter_by(id=job_id).one() job_json = job.get_json() ch_url = os.environ.get('KRAKEN_CLICKHOUSE_URL', consts.DEFAULT_CLICKHOUSE_URL) o = urlparse(ch_url) ch = clickhouse_driver.Client(host=o.hostname) tool = '' if not internals: tool = "and tool != ''" query = "select count(*) from logs where job = %d %s" % (job_id, tool) resp = ch.execute(query) total = resp[0][0] if order is None: order = 'asc' query = "select time,message,service,host,level,job,tool,step from logs where job = %d %s order by time %s, seq %s limit %d, %d" query %= (job_id, tool, order, order, start, limit) log.info(query) rows = ch.execute(query) logs = [] for r in rows: entry = dict(time=r[0], message=r[1], service=r[2], host=r[3], level=r[4].lower()[:4], job=r[5], tool=r[6], step=r[7]) logs.append(entry) return {'items': logs, 'total': total, 'job': job_json}, 200
def get_click_house(): ch_host = '10.121.221.19' ch_port = '9000' ch_db = 'ecrlab' ch_user = '******' ch_pwd = '' client = clickhouse_driver.Client(host=ch_host, port=ch_port, database=ch_db, user=ch_user, password=ch_pwd) create_sql = '''CREATE TABLE if not exists ecrlab.lure2_result (`ProductName` Nullable(String), `lscRuleCategory` Nullable(String), `dateOfLastFiring` Nullable(String), `lscRuleMessage` Nullable(String), `messageLink` Nullable(String), `isClassified` Int32, `lscRuleRequestNumber` Nullable(String), `lscSeverity` Nullable(String), `tips` Nullable(String), `solution` Nullable(String), `date` Date) ENGINE = MergeTree() PARTITION BY date ORDER BY (date) SETTINGS index_granularity = 8192;''' client.execute(create_sql) return client
def _check_for_errors_in_logs(): ch_url = os.environ.get('KRAKEN_CLICKHOUSE_URL', consts.DEFAULT_CLICKHOUSE_URL) o = urlparse(ch_url) ch = clickhouse_driver.Client(host=o.hostname) now = utils.utcnow() start_date = now - datetime.timedelta(hours=1) query = "select count(*) from logs where level = 'ERROR' and time > %(start_date)s;" rows = ch.execute(query, {'start_date': start_date}) errors_count = rows[0][0] redis_addr = os.environ.get('KRAKEN_REDIS_ADDR', consts.DEFAULT_REDIS_ADDR) rds = redis.Redis(host=redis_addr, port=6379, db=consts.REDIS_KRAKEN_DB) rds.set('error-logs-count', errors_count)
def clickhouse(schema, tables, data_directory, **params): import clickhouse_driver logger.info('Initializing ClickHouse...') database = params.pop("database") client = clickhouse_driver.Client(**params) client.execute(f"DROP DATABASE IF EXISTS {database}") client.execute(f"CREATE DATABASE {database}") client.execute(f"USE {database}") for stmt in filter(None, map(str.strip, schema.read().split(';'))): client.execute(stmt) for table, df in read_tables(tables, data_directory): query = f"INSERT INTO {table} VALUES" client.insert_dataframe(query, df, settings={"use_numpy": True})
def get_ch_column_type(db, table, conf): column_type_dic = {} try: client = clickhouse_driver.Client(host=cnf['clickhouse_server']['host'], \ port=cnf['clickhouse_server']['port'], \ user=cnf['clickhouse_server']['user'], \ password=cnf['clickhouse_server']['passwd']) sql = "select name,type from system.columns where database='{0}' and table='{1}'".format('warehouse', 's_' + table) for d in client.execute(sql): if colum_lower_upper: column_type_dic[d[0].upper()] = d[1] else: column_type_dic[d[0].lower()] = d[1] return column_type_dic except Exception as error: message = "获取clickhouse里面的字段类型错误. %s" % (error) logger.error(message) exit(1)
def get_database(tag): ''' get the latest database with the given tag uses the default tag if None is passed ''' if tag is None: tag = treemap_config.DEFAULT_TAG qry = ''' select name from system.databases where name like '{}_%' order by name desc limit 1 ''' qry = qry.format(tag) click = clickhouse_driver.Client(treemap_config.CLICK_HOST, compression=True) return click.execute(qry)[0][0]
def __init__(self, featuredata, feature_name=None, stock_data=None, returnday=5, host=clickhouse_ip, port=clickhouse_port, user=clickhouse_user, password=clickhouse_password) -> None: self.feature = featuredata self.featurename = featuredata.name if feature_name is None else feature_name self.feature.name = self.featurename self.codelist = self.feature.index.levels[1].unique().tolist() self.start = str(self.feature.index.levels[0][0])[0:10] self.end = str(self.feature.index.levels[0][-1])[0:10] self._host = host self._port = port self._user = user self._password = password self.factorclient = clickhouse_driver.Client(host=self._host, port=self._port, user=self._user, password=self._password, database='factor') self.dataclient = QACKClient(host=self._host, port=self._port, user=self._user, password=self._password) if stock_data is None: self.stock_data = self.dataclient.get_stock_day_qfq_adv( self.codelist, self.start, QA_util_get_next_day(self.end, returnday)) else: self.stock_data = stock_data self.returns = self.make_ret(self.stock_data.data).dropna()
def get_last_rq_jobs_names(): # get the last RQ jobs ch_url = os.environ.get('KRAKEN_CLICKHOUSE_URL', consts.DEFAULT_CLICKHOUSE_URL) o = urlparse(ch_url) ch = clickhouse_driver.Client(host=o.hostname) now = utils.utcnow() start_date = now - datetime.timedelta(hours=12111) query = "select max(time) as mt, tool, count(*) from logs " query += "where service = 'rq' and tool != '' " query += "group by tool " query += "having mt > %(start_date)s " query += "order by mt desc " query += "limit 100;" resp = ch.execute(query, {'start_date': start_date}) task_names = [] for row in resp: task_names.append(dict(time=row[0], name=row[1], lines=row[2])) return {'items': task_names}, 200
def get_databases(tag): ''' returns the set of available databases only shows for the given tag if included ''' click = clickhouse_driver.Client( treemap_config.CLICK_HOST, compression=True) qry = 'select name from system.databases' if tag is not None: qry += " where name like '{}%'".format(tag) rows = click.execute(qry) databases = set() for row in rows: databases.add(row[0]) try: databases.remove('default') databases.remove('system') databases.remove('_temporary_and_external_tables') except: pass return databases
def data_to_ck(event, alarm_info, alarm_mail, debug, skip_dmls_all, skip_delete_tb_name, skip_update_tb_name, pk_dict, only_schemas, **kwargs): client = clickhouse_driver.Client(host=kwargs['host'], port=kwargs['port'], user=kwargs['user'], password=kwargs['passwd']) #检查mutations是否有失败的(ch后台异步的update和delete变更) mutation_list = ['mutation_faild', 'table', 'create_time'] fail_list = [] mutation_data = [] if len(only_schemas) == 1: query_sql = "select count(*) from system.mutations where is_done=0 and database in %s" % ( str(tuple(only_schemas))) query_sql = query_sql.replace(",", '') else: query_sql = "select count(*) from system.mutations where is_done=0 and database in %s" % ( str(tuple(only_schemas))) mutation_sql = "select count(*) as mutation_faild ,concat(database,'.',table)as db,create_time from system.mutations where is_done=0 and database in %s group by db,create_time" % ( str(tuple(only_schemas))) mutations_faild_num = client.execute(query_sql)[0][0] if mutations_faild_num >= 10: fail_data = client.execute(mutation_sql) for d in fail_data: fail_list.append(list(d)) for d in fail_list: tmp = dict(zip(mutation_list, d)) mutation_data.append(tmp) last_data = json.dumps(mutation_data, indent=4, cls=DateEncoder) message = "mutations error faild num {0}. delete有失败.请进行检查. 详细信息: {1}".format( mutations_faild_num, last_data) logger.error(message) send_mail(alarm_mail, alarm_info, message) # 字段大小写问题的处理 for data in event: for items in data: for key, value in items['values'].items(): if colum_lower_upper: items['values'][key.upper()] = items['values'].pop(key) else: items['values'][key.lower()] = items['values'].pop(key) # 处理同一条记录update多次的情况 new_data = [] for tmp_data in event: table = tmp_data[0]['table'] schema = tmp_data[0]['schema'] if tmp_data[0]['action'] == 'insert': new_data.append(keep_new_update(tmp_data, schema, table, pk_dict)) else: new_data.append(tmp_data) tmp_data_dic = {} event_table = [] for data in new_data: name = '{}.{}.{}'.format(data[0]['schema'], data[0]['table'], data[0]['action']) tmp_data_dic[name] = data event_table.append(name) event_table = list(set(event_table)) del_ins = action_reverse(event_table) # 删除多余的insert,并且最后生成需要的格式[[],[]] for table_action in del_ins: del_insert_record(table_action, tmp_data_dic, pk_dict) # 生成最后处理好的数据 last_data = [] for k, v in tmp_data_dic.items(): if len(v) != 0: last_data.append(v) # 排序,执行顺序,delete,insert tmp_dict = {} i = 0 for d in last_data: tmp_dict[str(str(i))] = d[0]['action_core'] i = i + 1 sort_list = sorted(tmp_dict.items(), key=lambda x: x[1]) new_event = [] for i in sort_list: index = int(i[0]) new_event.append(last_data[index]) # 正式把处理完成的数据插入clickhouse for tmp_data in new_event: if tmp_data[0]['action'] == 'delete': table = tmp_data[0]['table'] schema = tmp_data[0]['schema'] skip_dml_table_name = "{0}.{1}".format(schema, table) del_sql = event_primary_key(schema, table, tmp_data, pk_dict) if debug: message = "DELETE 数据删除SQL: %s" % (del_sql) logger.info(message) try: if 'delete' in skip_dmls_all: return True elif skip_dml_table_name in skip_delete_tb_name: return True else: client.execute(del_sql) except Exception as error: message = "执行出错SQL: " + del_sql mail_contex = "{0} {1}".format(message, error) send_mail(alarm_mail, alarm_info, mail_contex) logger.error(message) logger.error(error) return False elif tmp_data[0]['action'] == 'insert': sql = insert_update(tmp_data, pk_dict) try: if client.execute(sql['query_sql'])[0][0] >= 1: client.execute(sql['del_sql']) except Exception as error: message = "在插入数据之前删除数据,执行出错SQL: " + sql['del_sql'] mail_contex = "{0} {1}".format(message, error) send_mail(alarm_mail, alarm_info, mail_contex) logger.error(message) logger.error(error) return False if debug: message = "INSERT 数据插入SQL: %s %s " % (sql['insert_sql'], str(sql['insert_data'])) logger.info(message) try: client.execute(sql['insert_sql'], sql['insert_data'], types_check=True) except Exception as error: message = "插入数据执行出错SQL: " + sql['insert_sql'] + str( sql['insert_data']) mail_contex = "{0} {1}".format(message, error) send_mail(alarm_mail, alarm_info, mail_contex) logger.error(message) logger.error(error) return False del new_event del tmp_dict del last_data del tmp_data_dic del new_data gc.collect() return True
if main_metric_element is not None and main_metric_element.tag != 'min_time': raise Exception( 'Only the min_time main metric is supported. This test uses \'{}\''. format(main_metric_element.tag)) # FIXME another way to detect infinite tests. They should have an appropriate main_metric but sometimes they don't. infinite_sign = root.find('.//average_speed_not_changing_for_ms') if infinite_sign is not None: raise Exception('Looks like the test is infinite (sign 1)') # Open connections servers = [{ 'host': host, 'port': port } for (host, port) in zip(args.host, args.port)] connections = [clickhouse_driver.Client(**server) for server in servers] report_stage_end('connect') # Apply settings settings = root.findall('settings/*') for c in connections: for s in settings: c.execute("set {} = '{}'".format(s.tag, s.text)) # Check tables that should exist. If they don't exist, just skip this test. tables = [e.text for e in root.findall('preconditions/table_exists')] for t in tables: for c in connections: try: res = c.execute("show create table {}".format(t))
'SALE_MNY': Decimal('10.000000'), 'SALE_MNY_NOTAX': Decimal('8.850000'), 'PAY_DIBS': Decimal('0.000000'), 'CASH_MNY': Decimal('10.000000'), 'PAY_ZERO': Decimal('0.000000'), 'GROSS_PROFIT': Decimal('1.200000'), 'TRADE_INFO': '现金 ¥10.00', 'IS_CALC_INTEGRAL': None, 'INTEGRAL': 0, 'BARTER_INTEGRAL': 0, 'REMARK': None, 'INVOICE_FLAG': None, 'TAX_RATE': None, 'CREA_DATE': '20210304140337', 'CREA_USER': '******', 'GUIDE_USER': '******', 'PLAN_DATE': '20210304', 'LINKMAN': None, 'TELEPHONE': None, 'SEND_ADDR': None, 'COMM': '00', 'TIME_STAMP': 1614838011 }] client = clickhouse_driver.Client(host='10.0.116.191', port='9000', database='mrtdb', user='******', password='******') client.execute(sql['insert_sql'], insert_data, types_check=True) print(datetime.datetime(1970, 1, 2, 14, 1))
def __init__(self, host, port, user, password): self._client = clickhouse_driver.Client(host=host, port=port, user=user, password=password)
# Print report threshold for the test if it is set. ignored_relative_change = 0.05 if 'max_ignored_relative_change' in root.attrib: ignored_relative_change = float(root.attrib["max_ignored_relative_change"]) print(f'report-threshold\t{ignored_relative_change}') reportStageEnd('before-connect') # Open connections servers = [{ 'host': host or args.host[0], 'port': port or args.port[0] } for (host, port) in itertools.zip_longest(args.host, args.port)] # Force settings_is_important to fail queries on unknown settings. all_connections = [ clickhouse_driver.Client(**server, settings_is_important=True) for server in servers ] for i, s in enumerate(servers): print(f'server\t{i}\t{s["host"]}\t{s["port"]}') reportStageEnd('connect') if not args.use_existing_tables: # Run drop queries, ignoring errors. Do this before all other activity, # because clickhouse_driver disconnects on error (this is not configurable), # and the new connection loses the changes in settings. drop_query_templates = [q.text for q in root.findall('drop_query')] drop_queries = substitute_parameters(drop_query_templates) for conn_index, c in enumerate(all_connections):