def increment_fks(self): cur = self._cursor cur._defer_warnings = True set_clause = "" joiner = "" for table_data in self._config.fk_mapping: set_clause = "" joiner = "" if (table_data['fields'] == None): continue for field in table_data['fields']: if (set_clause != ""): joiner = ", " set_clause += joiner + "`%(field)s` = `%(field)s` + %(step)d" % { "field": field, 'step': self._increment_value } try: self._logger.qs = "UPDATE `%(table)s` SET %(set_clause)s" % { "table": table_data['table'], "set_clause": set_clause } cur.execute(self._logger.qs) except Exception, e: handle_exception( "There was an error while updating FKs in table `%s`" % (table_data['table']), e, self._conn)
def map_pks_to_target_on_unique_conflict(self): cur = self._cursor # Update all the PKs in the source db to the value from destination db # if there's unique value collidinb with target database update_cur = self._conn.cursor() for table_name, table_map in self._db_map.items(): pks = table_map["primary"].keys() if len(pks) != 1: continue pk_col = pks[0] pks_processed = [] for index_name, columns in table_map["indexes"].items(): new_pk = old_pk = "" try: # Get all rows that have the same unique value as our destination table self._logger.qs = ( "SELECT t1.`%(pk_col)s` as old_pk, t2.`%(pk_col)s` as new_pk " "FROM `%(table)s` t1 " "LEFT JOIN `%(destination_db)s`.`%(table)s` t2 ON (%(join)s) " "WHERE t2.`%(pk_col)s` is not null" % { "destination_db": self._destination_db["db"], "table": table_name, "pk_col": pk_col, "join": " AND ".join( [ "(t1.`%(column)s` = t2.`%(column)s` AND t2.`%(column)s` is not null)" % {"column": column} for column in columns ] ), } ) cur.execute(self._logger.qs) # Update all those rows PKs - to trigger the CASCADE on all pointers while True: row = cur.fetchone() if not row: break new_pk, old_pk = row["new_pk"], row["old_pk"] self._logger.qs = ( "UPDATE `%(table)s` set `%(pk_col)s`=%(new_pk)s where `%(pk_col)s`=%(old_pk)s" % {"table": table_name, "pk_col": pk_col, "new_pk": row["new_pk"], "old_pk": row["old_pk"]} ) update_cur.execute(self._logger.qs) self._db_map[table_name]["pk_changed_to_resolve_unique_conficts"].append(str(row["new_pk"])) except Exception, e: handle_exception( "There was an error while normalizing unique index `%s`.`%s` from values '%s' to value '%s'" % (table_name, index_name, old_pk, new_pk), e, self._conn, ) """
def null_orphaned_fks(self): cur = self._cursor # Null orphaned FKs mapping = self._orphaned_rows_update_values["columns"] for table_name, table_map in self._db_map.items(): for col_name, fk_data in table_map["fk_host"].items(): params = { "child": table_name, "child_col": col_name, "parent": fk_data["parent"], "parent_col": fk_data["parent_col"], "value": mapping[col_name] if mapping.has_key(col_name) else "null", } self._logger.qs = ( "UPDATE `%(child)s` c set c.`%(child_col)s`=%(value)s WHERE not exists (select * from `%(parent)s` p where p.`%(parent_col)s`=c.`%(child_col)s` limit 1)" % params ) try: try: cur.execute(self._logger.qs) except (MySQLdb.Warning, MySQLdb.IntegrityError), e: # If nulling failed, let's delete problematic rows self._logger.qs = ( "DELETE FROM `%(child)s` WHERE not exists (select * from `%(parent)s` p where p.`%(parent_col)s`=`%(child)s`.`%(child_col)s` limit 1)" % params ) cur.execute(self._logger.qs) except Exception, e: handle_exception( "There was an error while nulling orphaned FK on `%s`.`%s`" % (table_name, col_name), e, self._conn, )
def convert_mapped_fks_to_real_fks(self): cur = self._cursor # Convert mapped FKs to real FKs for table_name, table_map in self._db_map.items(): for col_name, fk_data in table_map["fk_create"].items(): constraint_name = "" try: constraint_name = "%s_%s_dbmerge" % ( table_name[0:25], col_name[0:25], ) # max length of constraint name is 64 self._logger.qs = ( "alter table `%s` add foreign key `%s` (`%s`) references `%s` (`%s`) on update cascade" % (table_name, constraint_name, col_name, fk_data["parent"], fk_data["parent_col"]) ) cur.execute(self._logger.qs) self._db_map[table_name]["fk_host"][col_name] = fk_data del self._db_map[table_name]["fk_create"][col_name] except Exception, e: handle_exception( "There was an error while creating new FK `%s` on `%s`.`%s`" % (constraint_name, table_name, col_name), e, self._conn, )
def convert_fks_to_update_cascade(self): cur = self._cursor # Convert FK to on update cascade for table_name, table_map in self._db_map.items(): for col_name, fk_data in table_map["fk_host"].items(): try: self._logger.qs = "alter table `%s` drop foreign key `%s`" % ( table_name, fk_data["constraint_name"], ) cur.execute(self._logger.qs) self._logger.qs = ( "alter table `%s` add foreign key `%s` (`%s`) references `%s` (`%s`) on update cascade" % (table_name, fk_data["constraint_name"], col_name, fk_data["parent"], fk_data["parent_col"]) ) cur.execute(self._logger.qs) except Exception, e: handle_exception( "There was an error while converting FK `%s` on `%s`.`%s` to ON UPDATE CASCADE" % (fk_data["constraint_name"], table_name, col_name), e, self._conn, )
def null_orphaned_fks(self): cur = self._cursor # Null orphaned FKs mapping = self._orphaned_rows_update_values['columns'] for table_name, table_map in self._db_map.items(): for col_name, fk_data in table_map['fk_host'].items(): params = { 'child': table_name, 'child_col': col_name, 'parent': fk_data['parent'], 'parent_col': fk_data['parent_col'], 'value': mapping[col_name] if mapping.has_key(col_name) else "null" } if params['child'] == params['parent']: self._logger.qs = "UPDATE `%(child)s` c INNER JOIN `%(parent)s` p ON p.`%(parent_col)s`=c.`%(child_col)s` set c.`%(child_col)s`=%(value)s" % params else: self._logger.qs = "UPDATE `%(child)s` c set c.`%(child_col)s`=%(value)s WHERE not exists (select * from `%(parent)s` p where p.`%(parent_col)s`=c.`%(child_col)s` limit 1)" % params try: try: cur.execute(self._logger.qs) except (MySQLdb.Warning, MySQLdb.IntegrityError), e: # If nulling failed, let's delete problematic rows self._logger.qs = "DELETE FROM `%(child)s` WHERE not exists (select * from `%(parent)s` p where p.`%(parent_col)s`=`%(child)s`.`%(child_col)s` limit 1)" % params cur.execute(self._logger.qs) except Exception, e: handle_exception( "There was an error while nulling orphaned FK on `%s`.`%s`" % (table_name, col_name), e, self._conn)
def execute_preprocess_queries(self): cur = self._cursor for q in self._config.preprocess_queries: try: self._logger.qs = q cur.execute(self._logger.qs) #except _mysql_exceptions.OperationalError,e: except Exception, e: handle_exception( "There was an error while executing preprocess_queries\nPlease fix your config and try again", e, self._conn)
def execute_preprocess_queries(self): cur = self._cursor cur._defer_warnings = True for q in self._config.preprocess_queries: try: self._logger.qs = q cur.execute(self._logger.qs) except Exception, e: handle_exception( "There was an error while executing preprocess_queries\nPlease fix your config and try again", e, self._conn)
def copy_data_to_target(self): cur = self._cursor diff_tables = self._source_mapper.get_non_overlapping_tables(self._destination_db_map) for k, v in diff_tables.items(): if len(v): self._logger.log("----> Skipping some missing tables in %s database: %s; " % (k, v)) # Copy all the data to destination table for table_name, table_map in self._db_map.items(): if any([table_name in v for k, v in diff_tables.items()]): continue try: where = "" if len(table_map["pk_changed_to_resolve_unique_conficts"]): where = "WHERE %(pk_col)s NOT IN (%(ids)s)" % { "pk_col": table_map["primary"].keys()[0], "ids": ",".join(table_map["pk_changed_to_resolve_unique_conficts"]), } diff_columns = self._source_mapper.get_non_overlapping_columns(self._destination_db_map, table_name) for k, v in diff_columns.items(): if len(v): self._logger.log( "----> Skipping some missing columns in %s database in table %s: %s; " % (k, table_name, v) ) columns = self._source_mapper.get_overlapping_columns(self._destination_db_map, table_name) if not len(columns): raise Exception("Table %s have no intersecting column in merged database and destination database") self._logger.qs = ( "INSERT IGNORE INTO `%(destination_db)s`.`%(table)s` (%(columns)s) SELECT %(columns)s FROM `%(source_db)s`.`%(table)s` %(where)s" % { "destination_db": self._destination_db["db"], "source_db": self._source_db["db"], "table": table_name, "where": where, "columns": "`%s`" % ("`,`".join(columns)), } ) cur.execute(self._logger.qs) except Exception, e: hint = ( "--> HINT: Looks like you runned this script twice on the same database\n" if "Duplicate" in "%s" % e else "" ) handle_exception( ("There was an error while moving data between databases. Table: `%s`.\n" + hint) % (table_name), e, self._conn, )
def map_pks_to_target_on_unique_conflict(self): cur = self._cursor # Update all the PKs in the source db to the value from destination db # if there's unique value collidinb with target database update_cur = self._conn.cursor() for table_name, table_map in self._db_map.items(): pks = table_map['primary'].keys() if len(pks) != 1: continue pk_col = pks[0] pks_processed = [] for index_name, columns in table_map['indexes'].items(): new_pk = old_pk = "" try: # Get all rows that have the same unique value as our destination table self._logger.qs = "SELECT t1.`%(pk_col)s` as old_pk, t2.`%(pk_col)s` as new_pk " \ "FROM `%(table)s` t1 " \ "LEFT JOIN `%(destination_db)s`.`%(table)s` t2 ON (%(join)s) " \ "WHERE t2.`%(pk_col)s` is not null" % { 'destination_db': self._destination_db['db'], "table": table_name, "pk_col": pk_col, "join": " AND ".join([ "(t1.`%(column)s` = t2.`%(column)s` AND t2.`%(column)s` is not null)" % { 'column': column} for column in columns]) } cur.execute(self._logger.qs) # Update all those rows PKs - to trigger the CASCADE on all pointers while True: row = cur.fetchone() if not row: break new_pk, old_pk = row['new_pk'], row['old_pk'] self._logger.qs = "UPDATE `%(table)s` set `%(pk_col)s`=%(new_pk)s where `%(pk_col)s`=%(old_pk)s" % { 'table': table_name, 'pk_col': pk_col, 'new_pk': row['new_pk'], 'old_pk': row['old_pk'], } update_cur.execute(self._logger.qs) self._db_map[table_name]['pk_changed_to_resolve_unique_conficts'].append(str(row['new_pk'])) except Exception, e: handle_exception( "There was an error while normalizing unique index `%s`.`%s` from values '%s' to value '%s'" % ( table_name, index_name, old_pk, new_pk), e, self._conn) """
def convert_tables_to_innodb(self): cur = self._cursor # Convert all tables to InnoDB for table_name, table_map in self._db_map.items(): try: self._logger.qs = "alter table `%s` engine InnoDB" % (table_name) cur.execute(self._logger.qs) #except _mysql_exceptions.OperationalError,e: except Exception, e: handle_exception( "There was an error while converting table `%s` to InnoDB\nPlease fix your schema and try again" % table_name, e, self._conn)
def execute_postrocess_queries_target(self): cur = self._cursor cur._defer_warnings = True for q in self._config.postprocess_queries_target: try: self._logger.qs = q cur.execute(self._logger.qs) #except _mysql_exceptions.OperationalError,e: except Exception, e: handle_exception( "There was an error while executing postprocess_queries_target\nPlease fix your config and try again", e, self._conn)
def execute_preprocess_queries(self): cur = self._cursor for q in self._config.preprocess_queries: try: self._logger.qs = q cur.execute(self._logger.qs) # except _mysql_exceptions.OperationalError,e: except Exception, e: handle_exception( "There was an error while executing preprocess_queries\nPlease fix your config and try again", e, self._conn, )
def copy_data_to_target(self): cur = self._cursor cur._defer_warnings = True diff_tables = self._source_mapper.get_non_overlapping_tables( self._destination_db_map) for k, v in diff_tables.items(): if len(v): self._logger.log( "----> Skipping some missing tables in %s database: %s; " % (k, v)) # Copy all the data to destination table for table_name, table_map in self._db_map.items(): if any([table_name in v for k, v in diff_tables.items()]): continue if (self._source_db['db'] != self._config.main_db): if (table_name in self._config.exclude_tables): continue try: diff_columns = self._source_mapper.get_non_overlapping_columns( self._destination_db_map, table_name) for k, v in diff_columns.items(): if len(v): self._logger.log( "----> Skipping some missing columns in %s database in table %s: %s; " % (k, table_name, v)) columns = self._source_mapper.get_overlapping_columns( self._destination_db_map, table_name) if not len(columns): raise Exception( "Table %s have no intersecting column in merged database and destination database" ) self._logger.qs = "INSERT IGNORE INTO `%(destination_db)s`.`%(table)s` (%(columns)s) SELECT %(columns)s FROM `%(source_db)s`.`%(table)s`" % { 'destination_db': self._destination_db['db'], 'source_db': self._source_db['db'], 'table': table_name, 'columns': "`%s`" % ("`,`".join(columns)) } cur.execute(self._logger.qs) except Exception, e: hint = "--> HINT: Looks like you runned this script twice on the same database\n" if "Duplicate" in "%s" % e else "" handle_exception(( "There was an error while moving data between databases. Table: `%s`.\n" + hint) % (table_name), e, self._conn)
def convert_tables_to_innodb(self): cur = self._cursor # Convert all tables to InnoDB for table_name, table_map in self._db_map.items(): try: self._logger.qs = "alter table `%s` engine InnoDB" % (table_name) cur.execute(self._logger.qs) # except _mysql_exceptions.OperationalError,e: except Exception, e: handle_exception( "There was an error while converting table `%s` to InnoDB\nPlease fix your schema and try again" % table_name, e, self._conn, )
def rollback_pks(self): cur = self._cursor # Return all PKs to thei previous state: ID - 1 000 000 for table_name, table_map in self._db_map.items(): for col_name, col_data in table_map['primary'].items(): if table_map['fk_host'].has_key(col_name): continue try: self._logger.qs = "UPDATE `%(table)s` SET `%(pk)s` = `%(pk)s` - %(step)d" % {"table": table_name, "pk": col_name, 'step': self._increment_value} cur.execute(self._logger.qs) except Exception, e: handle_exception("There was an error while updating PK `%s`.`%s` to -%d + pk_value" % ( table_name, col_name, self._increment_value), e, self._conn)
def convert_fks_to_update_cascade(self): cur = self._cursor # Convert FK to on update cascade for table_name, table_map in self._db_map.items(): for col_name, fk_data in table_map['fk_host'].items(): try: self._logger.qs = "alter table `%s` drop foreign key `%s`" % ( table_name, fk_data['constraint_name']) cur.execute(self._logger.qs) self._logger.qs = "alter table `%s` add foreign key `%s` (`%s`) references `%s` (`%s`) on update cascade" % ( table_name, fk_data['constraint_name'], col_name, fk_data['parent'], fk_data['parent_col']) cur.execute(self._logger.qs) except Exception, e: handle_exception("There was an error while converting FK `%s` on `%s`.`%s` to ON UPDATE CASCADE" % ( fk_data['constraint_name'], table_name, col_name), e, self._conn)
def increment_pks(self): cur = self._cursor # Update all numeric PKs to ID + 1 000 000 for table_name, table_map in self._db_map.items(): for col_name, col_data in table_map['primary'].items(): # If current col is also a Foreign Key, we do not touch it if table_map['fk_host'].has_key(col_name): continue try: self._logger.qs = "UPDATE `%(table)s` SET `%(pk)s` = `%(pk)s` + %(step)d" % {"table": table_name, "pk": col_name, 'step': self._increment_value} cur.execute(self._logger.qs) except Exception, e: handle_exception("There was an error while updating PK `%s`.`%s` to %d + pk_value" % ( table_name, col_name, self._increment_value), e, self._conn)
def convert_mapped_fks_to_real_fks(self): cur = self._cursor # Convert mapped FKs to real FKs for table_name, table_map in self._db_map.items(): for col_name, fk_data in table_map['fk_create'].items(): constraint_name = "" try: constraint_name = "%s_%s_dbmerge" % ( table_name[0:25], col_name[0:25]) # max length of constraint name is 64 self._logger.qs = "alter table `%s` add foreign key `%s` (`%s`) references `%s` (`%s`) on update cascade" % ( table_name, constraint_name, col_name, fk_data['parent'], fk_data['parent_col']) cur.execute(self._logger.qs) self._db_map[table_name]['fk_host'][col_name] = fk_data del self._db_map[table_name]['fk_create'][col_name] except Exception, e: handle_exception("There was an error while creating new FK `%s` on `%s`.`%s`" % ( constraint_name, table_name, col_name), e, self._conn)
def copy_data_to_target(self): cur = self._cursor diff_tables = self._source_mapper.get_non_overlapping_tables(self._destination_db_map) for k, v in diff_tables.items(): if len(v): self._logger.log("----> Skipping some missing tables in %s database: %s; " % (k, v)) # Copy all the data to destination table for table_name, table_map in self._db_map.items(): if any([table_name in v for k,v in diff_tables.items()]): continue try: where = "" if len(table_map['pk_changed_to_resolve_unique_conficts']): where = "WHERE %(pk_col)s NOT IN (%(ids)s)" % { 'pk_col': table_map['primary'].keys()[0], 'ids': ",".join(table_map['pk_changed_to_resolve_unique_conficts']) } diff_columns = self._source_mapper.get_non_overlapping_columns(self._destination_db_map, table_name) for k, v in diff_columns.items(): if len(v): self._logger.log( "----> Skipping some missing columns in %s database in table %s: %s; " % (k, table_name, v)) columns = self._source_mapper.get_overlapping_columns(self._destination_db_map, table_name) if not len(columns): raise Exception("Table %s have no intersecting column in merged database and destination database") self._logger.qs = "INSERT IGNORE INTO `%(destination_db)s`.`%(table)s` (%(columns)s) SELECT %(columns)s FROM `%(source_db)s`.`%(table)s` %(where)s" % { 'destination_db': self._destination_db['db'], 'source_db': self._source_db['db'], 'table': table_name, 'where': where, 'columns': "`%s`" % ("`,`".join(columns)) } cur.execute(self._logger.qs) except Exception, e: hint = "--> HINT: Looks like you runned this script twice on the same database\n" if "Duplicate" in "%s" % e else "" handle_exception( ("There was an error while moving data between databases. Table: `%s`.\n" + hint) % (table_name), e, self._conn)
def increment_pks(self): cur = self._cursor cur._defer_warnings = True for table_name, table_map in self._db_map.items(): if (table_name in self._config.exclude_tables): continue increment_value = self.get_increment_value_table(table_name) for col_name, col_data in table_map['primary'].items(): try: self._logger.qs = "UPDATE `%(table)s` SET `%(pk)s` = `%(pk)s` + %(step)d" % { "table": table_name, "pk": col_name, 'step': increment_value } cur.execute(self._logger.qs) except Exception, e: handle_exception( "There was an error while updating PK `%s`.`%s` to %d + pk_value" % (table_name, col_name, self._increment_value), e, self._conn)
def rollback_pks(self): cur = self._cursor # Return all PKs to thei previous state: ID - 1 000 000 for table_name, table_map in self._db_map.items(): for col_name, col_data in table_map["primary"].items(): if table_map["fk_host"].has_key(col_name): continue try: self._logger.qs = "UPDATE `%(table)s` SET `%(pk)s` = `%(pk)s` - %(step)d" % { "table": table_name, "pk": col_name, "step": self._increment_value, } cur.execute(self._logger.qs) except Exception, e: handle_exception( "There was an error while updating PK `%s`.`%s` to -%d + pk_value" % (table_name, col_name, self._increment_value), e, self._conn, )
def increment_pks(self): cur = self._cursor # Update all numeric PKs to ID + 1 000 000 for table_name, table_map in self._db_map.items(): for col_name, col_data in table_map["primary"].items(): # If current col is also a Foreign Key, we do not touch it if table_map["fk_host"].has_key(col_name): continue try: self._logger.qs = "UPDATE `%(table)s` SET `%(pk)s` = `%(pk)s` + %(step)d" % { "table": table_name, "pk": col_name, "step": self._increment_value, } cur.execute(self._logger.qs) except Exception, e: handle_exception( "There was an error while updating PK `%s`.`%s` to %d + pk_value" % (table_name, col_name, self._increment_value), e, self._conn, )
print "" print "STEP 2. Map all the fields that looks like FKs but aren't stored as ones" map_fks(db_map) print "" print "STEP 3. Actually merge all the databases" print "" counter = 0 for source_db in config.merged_dbs: counter = counter + 1 try: source_db_tpl = copy.deepcopy(config.common_data) source_db_tpl.update(source_db) destination_db_tpl = copy.deepcopy(config.common_data) destination_db_tpl.update(config.destination_db) merger = Merger(destination_db_map, source_db_tpl, destination_db_tpl, config, counter, MiniLogger()) merger.merge() except Exception,e: conn = merger._conn if globals().has_key('merger') else None handle_exception("There was an unexpected error while merging db %s" % source_db['db'], e, conn) print "Merge is finished"