Exemple #1
0
    def process_write_rows_event(self, mysql_event):
        """
        Process specific MySQL event - WriteRowsEvent
        :param mysql_event: WriteRowsEvent instance
        :return:
        """
        if self.tables_prefixes:
            # we have prefixes specified
            # need to find whether current event is produced by table in 'looking-into-tables' list
            if not self.is_table_listened(mysql_event.table):
                # this table is not listened
                # processing is over - just skip event
                return

        # statistics
        self.stat_write_rows_event_calc_rows_num_min_max(
            rows_num_per_event=len(mysql_event.rows))

        if self.subscribers('WriteRowsEvent'):
            # dispatch event to subscribers

            # statistics
            self.stat_write_rows_event_all_rows(mysql_event=mysql_event)

            # dispatch Event
            event = Event()
            event.schema = mysql_event.schema
            event.table = mysql_event.table
            event.pymysqlreplication_event = mysql_event

            self.process_first_event(event=event)
            self.notify('WriteRowsEvent', event=event)

        if self.subscribers('WriteRowsEvent.EachRow'):
            # dispatch event to subscribers

            # statistics
            self.stat_write_rows_event_each_row()

            # dispatch Event per each row
            for row in mysql_event.rows:
                # statistics
                self.stat_write_rows_event_each_row_for_each_row()

                # dispatch Event
                event = Event()
                event.schema = mysql_event.schema
                event.table = mysql_event.table
                event.row = row['values']

                self.process_first_event(event=event)
                self.notify('WriteRowsEvent.EachRow', event=event)

        self.stat_write_rows_event_finalyse()
    def migrate_one_table_data(self, db=None, table=None):
        """
        Migrate one table
        :param db: db
        :param table: table
        :return: number of migrated rows
        """

        # build SQL statement
        full_table_name = self.create_full_table_name(db=db, table=table)
        ret = self.get_columns(db, full_table_name)
        fs = ret[0]
        columns = ret[1]

        last_id = self.get_checkpoints(db, table)

        sql = "SELECT {0} FROM {1}".format(",".join(columns), full_table_name)
        # in case we have WHERE clause for this db.table - add it to SQL
        if db in self.where_clauses and table in self.where_clauses[db]:
            sql += " WHERE {}".format(self.where_clauses[db][table])
        if int(last_id) > 0:
            sql += " WHERE id > {}".format(last_id)
        try:
            logging.info("migrate_table. sql={}".format(sql))
            self.client.cursorclass = SSDictCursor
            self.client.connect(db=db)
            self.client.cursor.execute(sql)
            cnt = 0
            while True:
                # fetch multiple rows from MySQL
                rows = self.client.cursor.fetchmany(self.pool_max_rows_num)
                if not rows:
                    break

                # insert Event with multiple rows into ClickHouse writer
                event = Event()
                event.schema = db
                event.table = table
                event.rows = rows
                self.chwriter.insert(event, fs)
                self.chwriter.flush()
                last_row = rows[len(rows) - 1]
                if "id" in last_row:
                    self.save_last_checkpoint(db, table, last_row["id"])
                cnt += len(rows)
        except Exception as ex:
            logging.critical("Critical error: {}".format(str(ex)))
            raise Exception("Can not migrate table on db={} table={}".format(
                db,
                table,
            ))

        return cnt
Exemple #3
0
    def migrate_one_table_data(self, db=None, table=None):
        """
        Migrate one table
        :param db: db
        :param table: table
        :return: number of migrated rows
        """

        self.client.cursorclass = SSDictCursor
        self.client.connect(db=db)

        # build SQL statement
        sql = "SELECT * FROM {0}".format(
            self.create_full_table_name(db=db, table=table))
        # in case we have WHERE clause for this db.table - add it to SQL
        if db in self.where_clauses and table in self.where_clauses[db]:
            sql += " WHERE {}".format(self.where_clauses[db][table])

        try:
            logging.info("migrate_table. sql={}".format(sql))
            self.client.cursor.execute(sql)
            cnt = 0
            while True:
                # fetch multiple rows from MySQL
                rows = self.client.cursor.fetchmany(self.pool_max_rows_num)
                if not rows:
                    break

                # insert Event with multiple rows into ClickHouse writer
                event = Event()
                event.schema = db
                event.table = table
                event.rows = rows
                self.chwriter.insert(event)
                self.chwriter.flush()

                cnt += len(rows)
        except:
            raise Exception(
                "Can not migrate table on host={} user={} password={} db={} table={} cnt={}"
                .format(self.host, self.user, self.password, db, table, cnt))

        return cnt
    def push(self):
        if not self.next_writer_builder or not self.fieldnames:
            return

        event = Event()
        event.schema = self.dst_schema
        event.table = self.dst_table
        event.filename = self.path
        event.fieldnames = self.fieldnames
        self.next_writer_builder.get().insert(event)
Exemple #5
0
 def flush(self, event):
     logging.info("start flush schema:{} table:{} rows:{}".format(
         self.schema, self.table, len(self.events)))
     batch_event = FakeRows()
     batch_event.rows = self.events
     event = Event()
     event.schema = self.schema
     event.table = self.table
     event.pymysqlreplication_event = batch_event
     event.fs = self.reader.get_field_schema_cache(self.schema, self.table)
     self.reader.process_first_event(event=event)
     self.reader.notify('WriteRowsEvent', event=event)
     self.events = []
     self.last_flush_time = time.time()
     self.reader.process_binlog_position(self.last_binlog_file_name,
                                         self.last_binlog_file_pos)
            self.file.flush()
            self.file.close()
            self.file = None
            self.writer = None

    def destroy(self):
        if self.delete and os.path.isfile(self.path):
            self.close()
            os.remove(self.path)

if __name__ == '__main__':
    path = 'file.csv'

    writer = CSVWriter(path)
    writer.open()
    event = Event()
    event.row_converted={
        'a': 123,
        'b': 456,
        'c': 'qwe',
        'd': 'rty',
    }
    writer.insert(event)
    event.row_converted={
        'a': 789,
        'b': 987,
        'c': 'asd',
        'd': 'fgh',
    }
    writer.insert(event)
    writer.close()
Exemple #7
0
    def process_update_rows_event(self, mysql_event, file, pos):
        if self.tables_prefixes:
            # we have prefixes specified
            # need to find whether current event is produced by table in 'looking-into-tables' list
            if not self.is_table_listened(mysql_event.table):
                # this table is not listened
                # processing is over - just skip event
                return
        for row in mysql_event.rows:
            row["values"] = row["after_values"]

        self.cache_pool.push_event(mysql_event.schema, mysql_event.table,
                                   mysql_event, file, pos)

        return

        # statistics
        self.stat_write_rows_event_calc_rows_num_min_max(
            rows_num_per_event=len(mysql_event.rows))

        fs = self.get_field_schema_cache(mysql_event.schema, mysql_event.table)

        if self.subscribers('UpdateRowsEvent'):
            logging.debug("start update")
            # dispatch event to subscribers

            # statistics
            self.stat_write_rows_event_all_rows(mysql_event=mysql_event)

            # dispatch Event
            event = Event()
            event.schema = mysql_event.schema
            event.table = mysql_event.table
            event.pymysqlreplication_event = mysql_event
            event.fs = fs

            self.process_first_event(event=event)
            self.notify('UpdateRowsEvent', event=event)

        if self.subscribers('UpdateRowsEvent.EachRow'):
            # dispatch event to subscribers

            # statistics
            self.stat_write_rows_event_each_row()

            # dispatch Event per each row
            for row in mysql_event.rows:
                # statistics
                self.stat_write_rows_event_each_row_for_each_row()
                # dispatch Event
                event = Event()
                event.schema = mysql_event.schema
                event.table = mysql_event.table
                event.row = row['values']
                event.before_row = row["before_values"]
                event.fs = fs
                self.process_first_event(event=event)
                self.notify('UpdateRowsEvent.EachRow', event=event)

        self.stat_write_rows_event_finalyse()