def insert(self, event_or_events=None): # event_or_events = [ # event: { # row: {'id': 3, 'a': 3} # }, # event: { # row: {'id': 3, 'a': 3} # }, # ] events = self.listify(event_or_events) if len(events) < 1: logging.warning('No events to insert. class: %s', __class__) return # assume we have at least one Event logging.debug('class:%s insert %d rows', __class__, len(events)) for event in events: schema = self.dst_schema if self.dst_schema else event.schema table = None if self.dst_distribute: table = TableProcessor.create_distributed_table_name( db=event.schema, table=event.table) else: table = self.dst_table if self.dst_table else event.table if self.dst_schema: table = TableProcessor.create_migrated_table_name( prefix=self.dst_table_prefix, table=table) sql = 'INSERT INTO `{0}`.`{1}` ({2}) FORMAT CSV'.format( schema, table, ', '.join(map(lambda column: '`%s`' % column, event.fieldnames)), ) choptions = "" if self.host: choptions += " --host=" + shlex.quote(self.host) if self.port: choptions += " --port=" + str(self.port) if self.user: choptions += " --user="******" --password="******"tail -n +2 '{0}' | clickhouse-client {1} --query='{2}'".format( event.filename, choptions, sql, ) logging.info('starting clickhouse-client process') logging.debug('starting %s', bash) os.system(bash) pass
def insert(self, event_or_events=None): # event_or_events = [ # event: { # row: {'id': 3, 'a': 3} # }, # event: { # row: {'id': 3, 'a': 3} # }, # ] events = self.listify(event_or_events) if len(events) < 1: logging.warning('No events to insert. class: %s', __class__) return # assume we have at least one Event logging.debug('class:%s insert %d event(s)', __class__, len(events)) # verify and converts events and consolidate converted rows from all events into one batch rows = [] event_converted = None for event in events: if not event.verify: logging.warning( 'Event verification failed. Skip one event. Event: %s Class: %s', event.meta(), __class__) continue # for event event_converted = self.convert(event) for row in event_converted: for key in row.keys(): # we need to convert Decimal value to str value for suitable for table structure if type(row[key]) == Decimal: row[key] = str(row[key]) elif type(row[key]) == datetime.timedelta: row[key] = str(row[key]) elif type(row[key]) == bytes: row[key] = '0x' + str.upper(row[key].hex()) rows.append(row) logging.debug('class:%s insert %d row(s)', __class__, len(rows)) # determine target schema.table schema = self.dst_schema if self.dst_schema else event_converted.schema table = None if self.dst_distribute: table = TableProcessor.create_distributed_table_name( db=event_converted.schema, table=event_converted.table) else: table = self.dst_table if self.dst_table else event_converted.table if self.dst_schema: table = TableProcessor.create_migrated_table_name( prefix=self.dst_table_prefix, table=table) logging.debug( "schema={} table={} self.dst_schema={} self.dst_table={}".format( schema, table, self.dst_schema, self.dst_table)) # and INSERT converted rows sql = '' try: sql = 'INSERT INTO `{0}`.`{1}` ({2}) VALUES'.format( schema, table, ', '.join(map(lambda column: '`%s`' % column, rows[0].keys()))) self.client.execute(sql, rows) except Exception as ex: logging.critical('QUERY FAILED') logging.critical('ex={}'.format(ex)) logging.critical('sql={}'.format(sql)) sys.exit(0)
def __init__( self, host=None, port=None, user=None, password=None, dbs=None, schema=None, distribute=None, cluster=None, tables=None, tables_prefixes=None, tables_where_clauses=None, column_skip=[], ): super().__init__(host=host, port=port, user=user, password=password, dbs=dbs, schema=schema, distribute=distribute, cluster=cluster, tables=tables, tables_prefixes=tables_prefixes, column_skip=column_skip) self.client.cursorclass = SSDictCursor # parse tables where clauses if not tables_where_clauses: return # tables_where_clauses contains: # [ # 'db1.t1=where_filename_1', # 'db2.t2=where_filename_2' # ] # debug info logging.info("tables_where_clauses={}".format(tables_where_clauses)) for table_where in tables_where_clauses: logging.info("table_where={}".format(table_where)) # process WHERE migration clauses for table_where_clause in tables_where_clauses: # table_where_clause contains 'db1.t1=where_filename_1' full_table_name, equals, where_file_name = table_where_clause.partition( '=') # sanity check if not full_table_name or not equals or not where_file_name: continue if not TableProcessor.is_full_table_name(full_table_name): continue # prepare the following data structure: # { # 'db1': { # 'table1': "a = 1 and b = 2" # 'table2': "c = 1 and d = 2" # }, # 'db2': { # 'table1': "e = 2 and f = 3" # 'table2': "g = 1 and h = 2" # } # } db, table = TableProcessor.parse_full_table_name(full_table_name) if not db in self.where_clauses: self.where_clauses[db] = {} if os.path.isfile(where_file_name): self.wheres[db][table] = open(where_file_name, 'r').read().strip("\n") else: self.wheres[db][table] = where_file_name # debug info logging.info("migration where clauses") for db, tables in self.where_clauses.items(): for table, where in tables.items(): logging.info("{}.{}.where={}".format(db, table, where))
def __init__( self, connection_settings, server_id, log_file=None, log_pos=None, schemas=None, tables=None, tables_prefixes=None, blocking=None, resume_stream=None, nice_pause=None, binlog_position_file=None, callbacks={}, ): super().__init__(callbacks=callbacks) self.connection_settings = connection_settings self.server_id = server_id self.log_file = log_file self.log_pos = log_pos self.schemas = None if not TableProcessor.extract_dbs( schemas, Util.join_lists( tables, tables_prefixes)) else TableProcessor.extract_dbs( schemas, Util.join_lists(tables, tables_prefixes)) self.tables = None if tables is None else TableProcessor.extract_tables( tables) self.tables_prefixes = None if tables_prefixes is None else TableProcessor.extract_tables( tables_prefixes) self.blocking = blocking self.resume_stream = resume_stream self.nice_pause = nice_pause self.binlog_position_file = binlog_position_file self.cache_pool = CachePool() self.cache_pool.reader = self logging.info("raw dbs list. len()=%d", 0 if schemas is None else len(schemas)) if schemas is not None: for schema in schemas: logging.info(schema) logging.info("normalised dbs list. len()=%d", 0 if self.schemas is None else len(self.schemas)) if self.schemas is not None: for schema in self.schemas: logging.info(schema) logging.info("raw tables list. len()=%d", 0 if tables is None else len(tables)) if tables is not None: for table in tables: logging.info(table) logging.info("normalised tables list. len()=%d", 0 if self.tables is None else len(self.tables)) if self.tables is not None: for table in self.tables: logging.info(table) logging.info("raw tables-prefixes list. len()=%d", 0 if tables_prefixes is None else len(tables_prefixes)) if tables_prefixes is not None: for table in tables_prefixes: logging.info(table) logging.info( "normalised tables-prefixes list. len()=%d", 0 if self.tables_prefixes is None else len(self.tables_prefixes)) if self.tables_prefixes is not None: for table in self.tables_prefixes: logging.info(table) if not isinstance(self.server_id, int): raise Exception( "Please specify server_id of src server as int. Ex.: --src-server-id=1" ) self.binlog_stream = BinLogStreamReader( # MySQL server - data source connection_settings=self.connection_settings, server_id=self.server_id, # we are interested in reading CH-repeatable events only only_events=[ # Possible events #BeginLoadQueryEvent, DeleteRowsEvent, #ExecuteLoadQueryEvent, #FormatDescriptionEvent, #GtidEvent, #HeartbeatLogEvent, #IntvarEvent #NotImplementedEvent, #QueryEvent, #RotateEvent, #StopEvent, #TableMapEvent, UpdateRowsEvent, WriteRowsEvent, #XidEvent, ], only_schemas=self.schemas, # in case we have any prefixes - this means we need to listen to all tables within specified schemas only_tables=self.tables if not self.tables_prefixes else None, log_file=self.log_file, log_pos=self.log_pos, freeze_schema= True, # If true do not support ALTER TABLE. It's faster. blocking=False, resume_stream=self.resume_stream, ) logging.debug("mysql connection settings:{}".format( self.connection_settings))