def process_write_rows_event(self, mysql_event): """ Process specific MySQL event - WriteRowsEvent :param mysql_event: WriteRowsEvent instance :return: """ if self.tables_prefixes: # we have prefixes specified # need to find whether current event is produced by table in 'looking-into-tables' list if not self.is_table_listened(mysql_event.table): # this table is not listened # processing is over - just skip event return # statistics self.stat_write_rows_event_calc_rows_num_min_max( rows_num_per_event=len(mysql_event.rows)) if self.subscribers('WriteRowsEvent'): # dispatch event to subscribers # statistics self.stat_write_rows_event_all_rows(mysql_event=mysql_event) # dispatch Event event = Event() event.schema = mysql_event.schema event.table = mysql_event.table event.pymysqlreplication_event = mysql_event self.process_first_event(event=event) self.notify('WriteRowsEvent', event=event) if self.subscribers('WriteRowsEvent.EachRow'): # dispatch event to subscribers # statistics self.stat_write_rows_event_each_row() # dispatch Event per each row for row in mysql_event.rows: # statistics self.stat_write_rows_event_each_row_for_each_row() # dispatch Event event = Event() event.schema = mysql_event.schema event.table = mysql_event.table event.row = row['values'] self.process_first_event(event=event) self.notify('WriteRowsEvent.EachRow', event=event) self.stat_write_rows_event_finalyse()
def migrate_one_table_data(self, db=None, table=None): """ Migrate one table :param db: db :param table: table :return: number of migrated rows """ # build SQL statement full_table_name = self.create_full_table_name(db=db, table=table) ret = self.get_columns(db, full_table_name) fs = ret[0] columns = ret[1] last_id = self.get_checkpoints(db, table) sql = "SELECT {0} FROM {1}".format(",".join(columns), full_table_name) # in case we have WHERE clause for this db.table - add it to SQL if db in self.where_clauses and table in self.where_clauses[db]: sql += " WHERE {}".format(self.where_clauses[db][table]) if int(last_id) > 0: sql += " WHERE id > {}".format(last_id) try: logging.info("migrate_table. sql={}".format(sql)) self.client.cursorclass = SSDictCursor self.client.connect(db=db) self.client.cursor.execute(sql) cnt = 0 while True: # fetch multiple rows from MySQL rows = self.client.cursor.fetchmany(self.pool_max_rows_num) if not rows: break # insert Event with multiple rows into ClickHouse writer event = Event() event.schema = db event.table = table event.rows = rows self.chwriter.insert(event, fs) self.chwriter.flush() last_row = rows[len(rows) - 1] if "id" in last_row: self.save_last_checkpoint(db, table, last_row["id"]) cnt += len(rows) except Exception as ex: logging.critical("Critical error: {}".format(str(ex))) raise Exception("Can not migrate table on db={} table={}".format( db, table, )) return cnt
def migrate_one_table_data(self, db=None, table=None): """ Migrate one table :param db: db :param table: table :return: number of migrated rows """ self.client.cursorclass = SSDictCursor self.client.connect(db=db) # build SQL statement sql = "SELECT * FROM {0}".format( self.create_full_table_name(db=db, table=table)) # in case we have WHERE clause for this db.table - add it to SQL if db in self.where_clauses and table in self.where_clauses[db]: sql += " WHERE {}".format(self.where_clauses[db][table]) try: logging.info("migrate_table. sql={}".format(sql)) self.client.cursor.execute(sql) cnt = 0 while True: # fetch multiple rows from MySQL rows = self.client.cursor.fetchmany(self.pool_max_rows_num) if not rows: break # insert Event with multiple rows into ClickHouse writer event = Event() event.schema = db event.table = table event.rows = rows self.chwriter.insert(event) self.chwriter.flush() cnt += len(rows) except: raise Exception( "Can not migrate table on host={} user={} password={} db={} table={} cnt={}" .format(self.host, self.user, self.password, db, table, cnt)) return cnt
def push(self): if not self.next_writer_builder or not self.fieldnames: return event = Event() event.schema = self.dst_schema event.table = self.dst_table event.filename = self.path event.fieldnames = self.fieldnames self.next_writer_builder.get().insert(event)
def flush(self, event): logging.info("start flush schema:{} table:{} rows:{}".format( self.schema, self.table, len(self.events))) batch_event = FakeRows() batch_event.rows = self.events event = Event() event.schema = self.schema event.table = self.table event.pymysqlreplication_event = batch_event event.fs = self.reader.get_field_schema_cache(self.schema, self.table) self.reader.process_first_event(event=event) self.reader.notify('WriteRowsEvent', event=event) self.events = [] self.last_flush_time = time.time() self.reader.process_binlog_position(self.last_binlog_file_name, self.last_binlog_file_pos)
self.file.flush() self.file.close() self.file = None self.writer = None def destroy(self): if self.delete and os.path.isfile(self.path): self.close() os.remove(self.path) if __name__ == '__main__': path = 'file.csv' writer = CSVWriter(path) writer.open() event = Event() event.row_converted={ 'a': 123, 'b': 456, 'c': 'qwe', 'd': 'rty', } writer.insert(event) event.row_converted={ 'a': 789, 'b': 987, 'c': 'asd', 'd': 'fgh', } writer.insert(event) writer.close()
def process_update_rows_event(self, mysql_event, file, pos): if self.tables_prefixes: # we have prefixes specified # need to find whether current event is produced by table in 'looking-into-tables' list if not self.is_table_listened(mysql_event.table): # this table is not listened # processing is over - just skip event return for row in mysql_event.rows: row["values"] = row["after_values"] self.cache_pool.push_event(mysql_event.schema, mysql_event.table, mysql_event, file, pos) return # statistics self.stat_write_rows_event_calc_rows_num_min_max( rows_num_per_event=len(mysql_event.rows)) fs = self.get_field_schema_cache(mysql_event.schema, mysql_event.table) if self.subscribers('UpdateRowsEvent'): logging.debug("start update") # dispatch event to subscribers # statistics self.stat_write_rows_event_all_rows(mysql_event=mysql_event) # dispatch Event event = Event() event.schema = mysql_event.schema event.table = mysql_event.table event.pymysqlreplication_event = mysql_event event.fs = fs self.process_first_event(event=event) self.notify('UpdateRowsEvent', event=event) if self.subscribers('UpdateRowsEvent.EachRow'): # dispatch event to subscribers # statistics self.stat_write_rows_event_each_row() # dispatch Event per each row for row in mysql_event.rows: # statistics self.stat_write_rows_event_each_row_for_each_row() # dispatch Event event = Event() event.schema = mysql_event.schema event.table = mysql_event.table event.row = row['values'] event.before_row = row["before_values"] event.fs = fs self.process_first_event(event=event) self.notify('UpdateRowsEvent.EachRow', event=event) self.stat_write_rows_event_finalyse()