Ejemplo n.º 1
0
def verify_and_copy_table(table):
    """
        This routine verifies the specified table's row count on the source is within a certain 
        configurable threshold and if it is copies it to the local database. If the tables schema
        on the source and target are equal and the source has not been moddified more recently
        than the target then the copy is skipped. An incremental copy is attempted and if failed 
        then a full copy is attempted.
        
        This is the main routine that the set of tables is mapped through by the multi-processing 
        pool. This routine returns the following codes:
        
         0 = successful copy
         1 = skipped copying table due to no change detected.
        -1 = failed validity check, source row count is too different than target
        
        Any other return value is an unknown failure.
    """
    logging.getLogger('PyDBCopy')

    # set up DB connections to both source and target DB
    source_host = MySQLHost(settings.source_host, settings.source_user, \
                            settings.source_password, settings.source_database)
    dest_host = MySQLHost(settings.target_host, settings.target_user, \
                          settings.target_password, settings.target_database)

    if table not in settings.tables_to_skip_verification:
        if not perform_validity_check(table, source_host, dest_host,
                                      settings.verify_threshold):
            return -1
    if settings.no_last_mod_check \
       or not dest_host.table_exists(table) \
       or not schema_compare(table, source_host, dest_host, True) \
       or not is_last_mod_same(table, source_host, dest_host):

        copied = False
        try:
            if not settings.force_full:
                logger.info("Starting incremental copy of table %s from %s(%s) to %s(%s)" % \
                       (table, source_host.database, source_host.host, dest_host.database, dest_host.host))
                copied = perform_incremental_copy(table, source_host,
                                                  dest_host, settings.scp_user,
                                                  settings.dump_dir)
                if copied:
                    logger.info("Successful incremental copy of table %s" %
                                table)
                else:
                    logger.warn("Failed incremental copy of table %s" % table)

            if not copied:
                logger.info("Starting full copy of table %s from %s(%s) to %s(%s)" % \
                       (table, source_host.database, source_host.host, dest_host.database, dest_host.host))
                copied = perform_full_copy(table, source_host, dest_host,
                                           settings.scp_user,
                                           settings.dump_dir)
                if copied:
                    logger.info("Successful full copy of table %s" % table)
                else:
                    logger.error("Failed full copy of table %s" % table)
        except:
            logger.error("Failed copy of table %s", table, exc_info=1)

        if not copied:
            return -3
    else:
        logger.info(
            "Skipping copying of table %s (source/dest have same row count and last mod date)"
            % table)
        return 1
    return 0
Ejemplo n.º 2
0
def verify_and_copy_table(table):
    """
        This routine verifies the specified table's row count on the source is within a certain 
        configurable threshold and if it is copies it to the local database. If the tables schema
        on the source and target are equal and the source has not been moddified more recently
        than the target then the copy is skipped. An incremental copy is attempted and if failed 
        then a full copy is attempted.
        
        This is the main routine that the set of tables is mapped through by the multi-processing 
        pool. This routine returns the following codes:
        
         0 = successful copy
         1 = skipped copying table due to no change detected.
        -1 = failed validity check, source row count is too different than target
        
        Any other return value is an unknown failure.
    """
    logging.getLogger('PyDBCopy')
    
    # set up DB connections to both source and target DB
    source_host = MySQLHost(settings.source_host, settings.source_user, \
                            settings.source_password, settings.source_database)
    dest_host = MySQLHost(settings.target_host, settings.target_user, \
                          settings.target_password, settings.target_database)
    
    if table not in settings.tables_to_skip_verification:
        if not perform_validity_check(table, source_host, dest_host, settings.verify_threshold):
            return - 1
    if settings.no_last_mod_check \
       or not dest_host.table_exists(table) \
       or not schema_compare(table, source_host, dest_host, True) \
       or not is_last_mod_same(table, source_host, dest_host):
        
        copied = False
        try:
            if not settings.force_full:
                logger.info("Starting incremental copy of table %s from %s(%s) to %s(%s)" % \
                       (table, source_host.database, source_host.host, dest_host.database, dest_host.host))
                copied = perform_incremental_copy(table, source_host, dest_host, settings.scp_user, settings.dump_dir)
                if copied:
                    logger.info("Successful incremental copy of table %s" % table)
                else:
                    logger.warn("Failed incremental copy of table %s" % table)
                    
            if not copied:
                logger.info("Starting full copy of table %s from %s(%s) to %s(%s)" % \
                       (table, source_host.database, source_host.host, dest_host.database, dest_host.host))
                copied = perform_full_copy(table, source_host, dest_host, settings.scp_user, settings.dump_dir)
                if copied:
                    logger.info("Successful full copy of table %s" % table)
                else:
                    logger.error("Failed full copy of table %s" % table)
        except:
            logger.error("Failed copy of table %s", table, exc_info=1)
                
        if not copied:
            return - 3
    else:
        logger.info("Skipping copying of table %s (source/dest have same row count and last mod date)" % table)
        return 1
    return 0
Ejemplo n.º 3
0
class MySQLHostTest(unittest.TestCase):
    """
        These tests need a world writable dump dir. The default directory is /share/mysql_dumps.
        Please keep in mind that issuing the SQL 'select into outfile' will cause mysql to write
        files as the user mysql is running as. The dump dir needs to be world writable so that 
        these tests can clean up the temp files that the mysql user writes. 
    """
    
    def setUp(self):
        settings.read_properties("pydbcopy.conf")

        self.source_host = MySQLHost(settings.source_host, settings.source_user, \
                                     settings.source_password, settings.source_database)
        self.dest_host = MySQLHost(settings.target_host, settings.target_user, \
                                   settings.target_password, settings.target_database)
    
        #
        # Just in case lets tear down an old or canceled run...
        #
        self.tearDown()
        
        #
        # Bring up the fixture
        #
        c = self.source_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")
        c.execute("create table if not exists tmp_pydbcopy_test ( id integer primary key, test_string varchar(50) )")
        c.execute("insert into tmp_pydbcopy_test (id,test_string) values (1,'test')")
        c.execute("create table if not exists tmp_hashed_pydbcopy_test ( id integer primary key, test_string varchar(50), fieldHash varchar(50) )")
        c.execute("insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (1,'test','123')")
        c.execute("insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (2,'test1','234')")
        c.execute("insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (3,'test2','345')")
        c.execute("create table if not exists tmp_pydbcopy_modified_test ( id integer primary key, test_string varchar(50), lastModifiedDate timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP )")
        c.execute("insert into tmp_pydbcopy_modified_test (id,test_string,lastModifiedDate) values (1,'test','2010-11-23 05:00:00')")
        c.close()

    def tearDown(self):
        #
        # Tear down the fixture
        #
        c = self.source_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")
        c.execute("drop table if exists tmp_pydbcopy_test")
        c.execute("drop table if exists tmp_hashed_pydbcopy_test")
        c.execute("drop table if exists tmp_pydbcopy_modified_test")
        
        c = self.dest_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")
        c.execute("drop table if exists tmp_pydbcopy_test")
        c.execute("drop table if exists tmp_hashed_pydbcopy_test")
        c.execute("drop table if exists tmp_pydbcopy_modified_test")
        c.close()

    def testTableExists(self):
        self.assertTrue(self.source_host.table_exists("tmp_pydbcopy_test"))
        self.assertFalse(self.source_host.table_exists("tmp_pydbcopy_tests"))
        self.assertFalse(self.source_host.table_exists("pydbcopy_test"))
    
    def testSelectIntoOutfile(self):
        filename = self.source_host.select_into_outfile("tmp_pydbcopy_test", None, settings.dump_dir)
        f = open(filename)
        filecontents = f.read()
        f.close
        os.remove(filename)
        self.assertEquals(filecontents, "1\ttest\n")

    def testLoadDataInFile(self):
        c = self.dest_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")
        
        c.execute("create table if not exists tmp_pydbcopy_test ( id integer primary key, test_string varchar(50) )")
        
        filename = "fixtures/testLoadDataInFile.csv"
        
        self.dest_host.load_data_in_file("tmp_pydbcopy_test", filename)

        c.execute("select * from tmp_pydbcopy_test")
        rows = c.fetchone()
        
        self.assertEquals(rows[0], 1)
        self.assertEquals(rows[1], 'test')
        
        c.close()
        
    def testTruncateTable(self):
        c = self.dest_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")
        
        c.execute("create table if not exists tmp_hashed_pydbcopy_test ( id integer primary key, test_string varchar(50), fieldHash varchar(50) )")
        c.execute("insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (1,'test','123')")
        c.execute("insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (2,'test1','234')")
        c.execute("insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (3,'test2','345')")
        
        c.execute("select * from tmp_hashed_pydbcopy_test")
        rows = c.fetchall()
        
        self.assertEquals(len(rows), 3)
        
        self.dest_host.truncate_table("tmp_hashed_pydbcopy_test")
        
        c.execute("select * from tmp_hashed_pydbcopy_test")
        rows = c.fetchall()
        
        self.assertEquals(len(rows), 0)
        
        c.close()
        
    def testGetTableStructure(self):
        expected = "CREATE TABLE `tmp_pydbcopy_test` (\n" \
                 + "  `id` int(11) NOT NULL,\n" \
                 + "  `test_string` varchar(50) DEFAULT NULL,\n" \
                 + "  PRIMARY KEY (`id`)\n" \
                 + ") ENGINE=MyISAM DEFAULT CHARSET=utf8"
        self.assertEquals(self.source_host.get_table_structure("tmp_pydbcopy_test"), expected)
    
    def testGetTableMaxLastModified(self):
        failure = -1
        self.assertEquals(self.source_host.get_table_max_modified("RunningJobs"), failure)
        expected = datetime.datetime(2010, 11, 23, 5, 0)
        self.assertEquals(self.source_host.get_table_max_modified("tmp_pydbcopy_modified_test"), expected)

    def testCreateTableWithSchema(self):
        c = self.source_host.conn.cursor()
                
        expected = "CREATE TABLE `tmp_pydbcopy_test` (\n" \
                 + "  `id` int(11) NOT NULL,\n" \
                 + "  `test_string` varchar(50) DEFAULT NULL,\n" \
                 + "  PRIMARY KEY (`id`)\n" \
                 + ") ENGINE=MyISAM DEFAULT CHARSET=utf8"
        
        self.dest_host.create_table_with_schema("tmp_pydbcopy_test", expected)
  
        c.execute("show create table %s" % ("tmp_pydbcopy_test"))
        rows = c.fetchone()
        struct = rows[1]
        
        self.assertEquals(struct, expected)
        
        c.close()
        
    def testGetCurrentHashSet(self):
        expected = set([ "123", "234", "345" ])
        self.assertEquals(self.source_host.get_current_hash_set("tmp_hashed_pydbcopy_test"), expected)

    def testDeleteRecords(self):
        c = self.dest_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")
        
        c.execute("create table if not exists tmp_hashed_pydbcopy_test ( id integer primary key, test_string varchar(50), fieldHash varchar(50) )")
        c.execute("insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (1,'test','123')")
        c.execute("insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (2,'test1','234')")
        c.execute("insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (3,'test2','345')")
        
        self.dest_host.delete_records("tmp_hashed_pydbcopy_test", set([ "123", "345" ]))
        
        c.execute("select * from tmp_hashed_pydbcopy_test")
        rows = c.fetchall()
        
        self.assertEquals(rows[0][0], 2)
        self.assertEquals(rows[0][1], 'test1')
        self.assertEquals(rows[0][2], '234')
        
        c.close()
        
    def testDeleteRecordsBatching(self):
        c = self.dest_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")
        
        c.execute("create table if not exists tmp_hashed_pydbcopy_test ( id integer primary key, test_string varchar(50), fieldHash varchar(50) )")
        
        cur_record = 0
        delete_set = set([])
        while cur_record <= 25000:
            c.execute("insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (%s,'%s','%s')" % (cur_record, cur_record, cur_record))
            if cur_record > 0 and cur_record < 21000:
                delete_set.add("%s" % cur_record)
            cur_record = cur_record + 1 
        
        
        self.dest_host.delete_records("tmp_hashed_pydbcopy_test", delete_set)
        
        c.execute("select * from tmp_hashed_pydbcopy_test order by id")
        rows = c.fetchall()
        
        self.assertEquals(len(rows), 4002)
        
        self.assertEquals(rows[0][0], 0)
        self.assertEquals(rows[1][0], 21000)
        self.assertEquals(rows[2][0], 21001)
        self.assertEquals(rows[4001][0], 25000)
        
        c.close()
        
    def testGetRowCount(self):
        self.assertEquals(self.source_host.get_row_count("tmp_hashed_pydbcopy_test"), 3)
Ejemplo n.º 4
0
class MySQLHostTest(unittest.TestCase):
    """
        These tests need a world writable dump dir. The default directory is /share/mysql_dumps.
        Please keep in mind that issuing the SQL 'select into outfile' will cause mysql to write
        files as the user mysql is running as. The dump dir needs to be world writable so that 
        these tests can clean up the temp files that the mysql user writes. 
    """
    def setUp(self):
        settings.read_properties("pydbcopy.conf")

        self.source_host = MySQLHost(settings.source_host, settings.source_user, \
                                     settings.source_password, settings.source_database)
        self.dest_host = MySQLHost(settings.target_host, settings.target_user, \
                                   settings.target_password, settings.target_database)

        #
        # Just in case lets tear down an old or canceled run...
        #
        self.tearDown()

        #
        # Bring up the fixture
        #
        c = self.source_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")
        c.execute(
            "create table if not exists tmp_pydbcopy_test ( id integer primary key, test_string varchar(50) )"
        )
        c.execute(
            "insert into tmp_pydbcopy_test (id,test_string) values (1,'test')")
        c.execute(
            "create table if not exists tmp_hashed_pydbcopy_test ( id integer primary key, test_string varchar(50), fieldHash varchar(50) )"
        )
        c.execute(
            "insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (1,'test','123')"
        )
        c.execute(
            "insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (2,'test1','234')"
        )
        c.execute(
            "insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (3,'test2','345')"
        )
        c.execute(
            "create table if not exists tmp_pydbcopy_modified_test ( id integer primary key, test_string varchar(50), lastModifiedDate timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP )"
        )
        c.execute(
            "insert into tmp_pydbcopy_modified_test (id,test_string,lastModifiedDate) values (1,'test','2010-11-23 05:00:00')"
        )
        c.close()

    def tearDown(self):
        #
        # Tear down the fixture
        #
        c = self.source_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")
        c.execute("drop table if exists tmp_pydbcopy_test")
        c.execute("drop table if exists tmp_hashed_pydbcopy_test")
        c.execute("drop table if exists tmp_pydbcopy_modified_test")

        c = self.dest_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")
        c.execute("drop table if exists tmp_pydbcopy_test")
        c.execute("drop table if exists tmp_hashed_pydbcopy_test")
        c.execute("drop table if exists tmp_pydbcopy_modified_test")
        c.close()

    def testTableExists(self):
        self.assertTrue(self.source_host.table_exists("tmp_pydbcopy_test"))
        self.assertFalse(self.source_host.table_exists("tmp_pydbcopy_tests"))
        self.assertFalse(self.source_host.table_exists("pydbcopy_test"))

    def testSelectIntoOutfile(self):
        filename = self.source_host.select_into_outfile(
            "tmp_pydbcopy_test", None, settings.dump_dir)
        f = open(filename)
        filecontents = f.read()
        f.close
        os.remove(filename)
        self.assertEquals(filecontents, "1\ttest\n")

    def testLoadDataInFile(self):
        c = self.dest_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")

        c.execute(
            "create table if not exists tmp_pydbcopy_test ( id integer primary key, test_string varchar(50) )"
        )

        filename = "fixtures/testLoadDataInFile.csv"

        self.dest_host.load_data_in_file("tmp_pydbcopy_test", filename)

        c.execute("select * from tmp_pydbcopy_test")
        rows = c.fetchone()

        self.assertEquals(rows[0], 1)
        self.assertEquals(rows[1], 'test')

        c.close()

    def testTruncateTable(self):
        c = self.dest_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")

        c.execute(
            "create table if not exists tmp_hashed_pydbcopy_test ( id integer primary key, test_string varchar(50), fieldHash varchar(50) )"
        )
        c.execute(
            "insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (1,'test','123')"
        )
        c.execute(
            "insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (2,'test1','234')"
        )
        c.execute(
            "insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (3,'test2','345')"
        )

        c.execute("select * from tmp_hashed_pydbcopy_test")
        rows = c.fetchall()

        self.assertEquals(len(rows), 3)

        self.dest_host.truncate_table("tmp_hashed_pydbcopy_test")

        c.execute("select * from tmp_hashed_pydbcopy_test")
        rows = c.fetchall()

        self.assertEquals(len(rows), 0)

        c.close()

    def testGetTableStructure(self):
        expected = "CREATE TABLE `tmp_pydbcopy_test` (\n" \
                 + "  `id` int(11) NOT NULL,\n" \
                 + "  `test_string` varchar(50) DEFAULT NULL,\n" \
                 + "  PRIMARY KEY (`id`)\n" \
                 + ") ENGINE=MyISAM DEFAULT CHARSET=utf8"
        self.assertEquals(
            self.source_host.get_table_structure("tmp_pydbcopy_test"),
            expected)

    def testGetTableMaxLastModified(self):
        failure = -1
        self.assertEquals(
            self.source_host.get_table_max_modified("RunningJobs"), failure)
        expected = datetime.datetime(2010, 11, 23, 5, 0)
        self.assertEquals(
            self.source_host.get_table_max_modified(
                "tmp_pydbcopy_modified_test"), expected)

    def testCreateTableWithSchema(self):
        c = self.source_host.conn.cursor()

        expected = "CREATE TABLE `tmp_pydbcopy_test` (\n" \
                 + "  `id` int(11) NOT NULL,\n" \
                 + "  `test_string` varchar(50) DEFAULT NULL,\n" \
                 + "  PRIMARY KEY (`id`)\n" \
                 + ") ENGINE=MyISAM DEFAULT CHARSET=utf8"

        self.dest_host.create_table_with_schema("tmp_pydbcopy_test", expected)

        c.execute("show create table %s" % ("tmp_pydbcopy_test"))
        rows = c.fetchone()
        struct = rows[1]

        self.assertEquals(struct, expected)

        c.close()

    def testGetCurrentHashSet(self):
        expected = set(["123", "234", "345"])
        self.assertEquals(
            self.source_host.get_current_hash_set("tmp_hashed_pydbcopy_test"),
            expected)

    def testDeleteRecords(self):
        c = self.dest_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")

        c.execute(
            "create table if not exists tmp_hashed_pydbcopy_test ( id integer primary key, test_string varchar(50), fieldHash varchar(50) )"
        )
        c.execute(
            "insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (1,'test','123')"
        )
        c.execute(
            "insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (2,'test1','234')"
        )
        c.execute(
            "insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (3,'test2','345')"
        )

        self.dest_host.delete_records("tmp_hashed_pydbcopy_test",
                                      set(["123", "345"]))

        c.execute("select * from tmp_hashed_pydbcopy_test")
        rows = c.fetchall()

        self.assertEquals(rows[0][0], 2)
        self.assertEquals(rows[0][1], 'test1')
        self.assertEquals(rows[0][2], '234')

        c.close()

    def testDeleteRecordsBatching(self):
        c = self.dest_host.conn.cursor()
        c.execute("SET AUTOCOMMIT=1")

        c.execute(
            "create table if not exists tmp_hashed_pydbcopy_test ( id integer primary key, test_string varchar(50), fieldHash varchar(50) )"
        )

        cur_record = 0
        delete_set = set([])
        while cur_record <= 25000:
            c.execute(
                "insert into tmp_hashed_pydbcopy_test (id,test_string,fieldHash) values (%s,'%s','%s')"
                % (cur_record, cur_record, cur_record))
            if cur_record > 0 and cur_record < 21000:
                delete_set.add("%s" % cur_record)
            cur_record = cur_record + 1

        self.dest_host.delete_records("tmp_hashed_pydbcopy_test", delete_set)

        c.execute("select * from tmp_hashed_pydbcopy_test order by id")
        rows = c.fetchall()

        self.assertEquals(len(rows), 4002)

        self.assertEquals(rows[0][0], 0)
        self.assertEquals(rows[1][0], 21000)
        self.assertEquals(rows[2][0], 21001)
        self.assertEquals(rows[4001][0], 25000)

        c.close()

    def testGetRowCount(self):
        self.assertEquals(
            self.source_host.get_row_count("tmp_hashed_pydbcopy_test"), 3)