def main(): global limit_step global complex_messages # configs config = LoadConfigs('config.ini') template_paths = GetConfigParameter(config, 'Templates', 'paths').split(',') filemaxlines = int(GetConfigParameter(config, 'OutputFile', 'filemaxlines')) delimiter = GetConfigParameter(config, 'OutputFile', 'delimiter') quotechar = GetConfigParameter(config, 'OutputFile', 'quotechar') outputdir = GetConfigParameter(config, 'OutputFile', 'outputdir') analitics_path = outputdir + '/' + GetConfigParameter(config, 'Analitics', 'filename') metrics = GetConfigParameter(config, 'Analitics', 'metrics').split(',') # add check if it is in query limit_step = int(GetConfigParameter(config, 'ClickHouse', 'limit_step')) clickhouse_host = GetConfigParameter(config, 'ClickHouse', 'host') colums = GetConfigParameter(config, 'ClickHouse', 'colums').split(',') date_start = sys.argv[1] date_end = sys.argv[2] complex_messages = [] total_count = limit_step*2 GetTemplatesFromCSV(template_paths) client = ClickHouseClient(clickhouse_host, on_progress=OnProgress) query_format = GenerateQueryFormat(config, colums) filename = GenerateFileName(outputdir, date_start, date_end) csvwriter = CSVWriter(filemaxlines, filename, delimiter, quotechar, u'\ufeff', colums + ['FullText', 'MessageType', 'Template']) analitics = Analitics(analitics_path, metrics) for single_date in DateRange(date_start, date_end): date_str = single_date.strftime("%Y-%m-%d") ColoredPrint('\n[Determiner]', 'Working on date(' + date_str + ')', bcolors.OKGREEN, bcolors.WARNING) limit_count = 0 msg_count = 0 while limit_count < total_count: query = query_format.format(date_str, limit_count, limit_step) result = client.select(query, on_progress=OnProgress, send_progress_in_http_headers=1) print() data_len = len(result.data) total_count = limit_count + data_len + 1 counter = Counter(data_len, 0.2) for v in result.data: message = Message(colums, v) id, c_message = ToComplexMessage(message) if c_message.isFullFiled(): operator_id = message.get('OperatorGroupId') originator = message.get('OutgoingOriginator') c_message.determineTemplate(GetTemplates(operator_id, originator)) c_message.writePartsToCsvFile(csvwriter) analitics.addData(date_str[:-3], c_message.getParameters(metrics), c_message.getType(), c_message.getCount()) complex_messages.pop(id) msg_count+=1 counter.step(bcolors.OKGREEN + '[' + date_str + '] ' + bcolors.ENDC + str(msg_count) + ' messages handled') counter.lastTell(bcolors.OKGREEN + '[' + date_str + '] ' + bcolors.ENDC + str(msg_count) + ' messages handled') del result limit_count += limit_step WriteDownAllRemainingData(csvwriter, analitics, metrics) csvwriter.close() SaveAnalitics(analitics, delimiter, quotechar)
def __init__(self, server, port, table_name, df): self.server = server self.port = port self.table_name = table_name self.df = df if not isinstance(self.server, ClickHouseServer): raise Exception('Expected ClickHouseServer, got ' + repr(self.server)) if not isinstance(self.df, DataFrame): raise Exception('Expected DataFrame, got ' + repr(self.df)) self.server.wait_for_request(port) self.client = ClickHouseClient(server.binary_path, port)
class ClickHouseTable: def __init__(self, server, port, table_name, df): self.server = server self.port = port self.table_name = table_name self.df = df if not isinstance(self.server, ClickHouseServer): raise Exception('Expected ClickHouseServer, got ' + repr(self.server)) if not isinstance(self.df, DataFrame): raise Exception('Expected DataFrame, got ' + repr(self.df)) self.server.wait_for_request(port) self.client = ClickHouseClient(server.binary_path, port) def _convert(self, name): types_map = { 'float64': 'Float64', 'int64': 'Int64', 'float32': 'Float32', 'int32': 'Int32' } if name in types_map: return types_map[name] return 'String' def _create_table_from_df(self): self.client.query('create database if not exists test') self.client.query('drop table if exists test.{}'.format(self.table_name)) column_types = list(self.df.dtypes) column_names = list(self.df) schema = ', '.join((name + ' ' + self._convert(str(t)) for name, t in zip(column_names, column_types))) print 'schema:', schema create_query = 'create table test.{} (date Date DEFAULT today(), {}) engine = MergeTree(date, (date), 8192)' self.client.query(create_query.format(self.table_name, schema)) insert_query = 'insert into test.{} ({}) format CSV' with tempfile.TemporaryFile() as tmp_file: self.df.to_csv(tmp_file, header=False, index=False) tmp_file.seek(0) self.client.query(insert_query.format(self.table_name, ', '.join(column_names)), pipe=tmp_file) def apply_model(self, model_name, float_columns, cat_columns): columns = ', '.join(list(float_columns) + list(cat_columns)) query = "select modelEvaluate('{}', {}) from test.{} format TSV" result = self.client.query(query.format(model_name, columns, self.table_name)) return tuple(map(float, filter(len, map(str.strip, result.split())))) def _drop_table(self): self.client.query('drop table test.{}'.format(self.table_name)) def __enter__(self): self._create_table_from_df() return self def __exit__(self, type, value, traceback): self._drop_table()
from client import ClickHouseClient from errors import Error as ClickHouseError def on_progress(total, read, progress): print(total,read,progress) try: client = ClickHouseClient('http://ch00.fin.adfox.ru:8123/', on_progress=on_progress, user='******', password='******') query = 'SELECT date FROM adfox.dist_elog WHERE date > toDate(0)' result = client.select(query, send_progress_in_http_headers=1) print(result.data) except ClickHouseError as e: print(e) except Exception as e: print(e)
from client import ClickHouseClient from errors import Error as ClickHouseError import logging logging.basicConfig(level=logging.DEBUG) def on_progress(total, read, progress): print(total, read, progress) try: client = ClickHouseClient('http://example.com:8123/', on_progress=on_progress, user='******', password='******') query = 'SELECT sum(load) as loads FROM dbname.table WHERE date = today()' result = client.select(query, send_progress_in_http_headers=1) print(result.data) except ClickHouseError as e: print(e) except Exception as e: print(e)
from client import ClickHouseClient from errors import Error as ClickHouseError def on_progress(total, read, progress): print(total, read, progress) try: client = ClickHouseClient('clh.datalight.me:8123/', on_progress=on_progress, user='******', password='******') query = 'SELECT 1' result = client.select(query, send_progress_in_http_headers=1) print(result.data) except ClickHouseError as e: print(e) except Exception as e: print(e)