def run(self, execution): """ execution: 执行函数,要对mysql的binlog记录做何种操作,函数输入必须是rowChangeRec那样的字典 """ client = Client() client.connect(host=self.ip) client.check_valid() client.subscribe() while True: message = client.get(100) entries = message['entries'] for entry in entries: entryType = entry.entryType if entryType in [ EntryProtocol_pb2.EntryType.TRANSACTIONBEGIN, EntryProtocol_pb2.EntryType.TRANSACTIONEND ]: continue rowChanges = EntryProtocol_pb2.RowChange() rowChanges.MergeFromString(entry.storeValue) eventType = rowChanges.eventType header = entry.header database = header.schemaName table = header.tableName for rowChange in rowChanges.rowDatas: formatData = dict() if eventType == EntryProtocol_pb2.EventType.DELETE: for column in rowChange.beforeColumns: formatData[column.name] = column.value elif eventType == EntryProtocol_pb2.EventType.INSERT: for column in rowChange.afterColumns: formatData[column.name] = column.value else: formatData['before'], formatData['after'] = dict( ), dict() for column in rowChange.beforeColumns: formatData['before'][column.name] = column.value for column in rowChange.afterColumns: formatData['after'][column.name] = column.value rowChangeRec = dict( db=database, table=table, event_type=eventType, data=formatData, ) execution(rowChangeRec) sleep(self.interval)
def create_canal_message(kafka_message): data = kafka_message packet = CanalProtocol_pb2.Packet() packet.MergeFromString(data) message = dict(id=0, entries=[]) # 因为从kafka获取的canal写入的消息, 所以这个条件应该永远成立 # if packet.type == CanalProtocol_pb2.PacketType.MESSAGES: messages = CanalProtocol_pb2.Messages() messages.MergeFromString(packet.body) for item in messages.messages: entry = EntryProtocol_pb2.Entry() entry.MergeFromString(item) message['entries'].append(entry) return message
def get_message(self): """ get message by kafka or canal_server """ while True: try: message = self.client.get(100) entries = message['entries'] for entry in entries: entry_type = entry.entryType if entry_type in [EntryProtocol_pb2.EntryType.TRANSACTIONBEGIN, EntryProtocol_pb2.EntryType.TRANSACTIONEND]: continue row_change = EntryProtocol_pb2.RowChange() row_change.MergeFromString(entry.storeValue) header = entry.header database = header.schemaName table = header.tableName event_type = header.eventType format_data = dict() for row in row_change.rowDatas: if event_type == EntryProtocol_pb2.EventType.DELETE: for column in row.beforeColumns: format_data[column.name] = column.value elif event_type == EntryProtocol_pb2.EventType.INSERT: for column in row.afterColumns: format_data[column.name] = column.value else: format_data['before'] = dict() format_data['after'] = dict() for column in row.beforeColumns: format_data['before'][column.name] = column.value for column in row.afterColumns: format_data['after'][column.name] = column.value data = dict( db=database, table=table, event_type=event_type, data=format_data, ) LOGGER.info(f"receive canal server message: {data}") self.executor.submit(self.notify, data) time.sleep(self.sleep_time) except: self.client.disconnect()
except: # 发生错误时回滚 db.rollback() while True: message = client.get(100) entries = message['entries'] for entry in entries: entry_type = entry.entryType if entry_type in [ EntryProtocol_pb2.EntryType.TRANSACTIONBEGIN, EntryProtocol_pb2.EntryType.TRANSACTIONEND ]: continue row_change = EntryProtocol_pb2.RowChange() row_change.MergeFromString(entry.storeValue) event_type = row_change.eventType header = entry.header database = header.schemaName table = header.tableName event_type = header.eventType for row in row_change.rowDatas: format_data = dict() if event_type == EntryProtocol_pb2.EventType.DELETE: # print(row.beforeColumns) for column in row.beforeColumns: if column.name == "id": format_data = {column.name: column.value} id = format_data["id"] deleteRow(table, id)
def run_forever(self, client, producer): from canal.protocol import EntryProtocol_pb2 from canal.protocol.EntryProtocol_pb2 import EntryType print(datetime.now(), " start running") topics = defaultdict(int) sleep_times, send_times = 0, 0 last_execute_time, add_million_seconds = -1, 0 while True: message = client.get(100) entries = message['entries'] for entry in entries: entry_type = entry.entryType if entry_type in (EntryType.TRANSACTIONBEGIN, EntryType.TRANSACTIONEND): continue row_change = EntryProtocol_pb2.RowChange() row_change.MergeFromString(entry.storeValue) header = entry.header database = header.schemaName table = header.tableName event_type = header.eventType # try add million second when meet same execute time fix_execute_time = header.executeTime if last_execute_time == header.executeTime: if add_million_seconds < 999: add_million_seconds += 1 fix_execute_time = header.executeTime + add_million_seconds else: print('over 1000 event in one seconds') fix_execute_time = header.executeTime + add_million_seconds else: last_execute_time = header.executeTime add_million_seconds = 0 row_time = self._convert_utc_time(fix_execute_time) logging.debug(' '.join( str(x) for x in [ row_time, fix_execute_time, header.executeTime, header.logfileOffset, add_million_seconds ])) for row in row_change.rowDatas: msg = self._generate_notice(event_type, row, row_time) event_type_name = self.SUPPORT_TYPE[event_type] topic_name = self._generate_topic_name( database, table, event_type_name) if topic_name is None or not self._mode.is_support( event_type_name): print('filter {} {} {} - topic{}'.format( database, table, event_type_name, topic_name)) continue if topic_name not in topics: print(topic_name, 'get') topics[topic_name] += 1 producer.send(topic_name, value=msg) send_times += 1 if not entries: time.sleep(WAIT_TIMES) if sleep_times % PRINT_EACH == 0: for t, n in topics.items(): print(' Topic: ', t, ' send ', n) topics = defaultdict(int) logger.debug("{} wait for in {} already send {}".format( datetime.now(), sleep_times, send_times)) sleep_times += 1