Example #1
0
 def restorable_time_failover_twice(self):
     
     def failover(id):
         rpc.call(utils.get_context(), "taskmanager",
              {"method": "failover",
                "args": {'instance_id':id}})
     
     self.create()
     master = utils.get_builtin_instance(self.inst_id)
     master_vid = master.virtual_instance_id
     rt_1 = utils.get_restorable_time(master_vid)
     
     # case1: failover only
     failover1_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
     failover(master.id)
     
     utils.check_server_status(master_vid, DBInstanceType.MASTER, timeout = 360)
     new_slave_id = master.id
     utils.check_server_status(new_slave_id, DBInstanceType.STANDBY, timeout = 360)
     
     rt_2 = utils.get_restorable_time(master_vid)
     assert rt_2.end > rt_1.end and rt_1.begin <= rt_2.begin, \
             ("before first failover rt: [%s, %s], after first failover rt: [%s, %s]" % (rt_1.begin, rt_1.end, rt_2.begin, rt_2.end))
     
     # case2: restart & failover
     master2 = utils.get_builtin_instance(master_vid)
     failover(master2.id)
     utils.check_server_status(master_vid, DBInstanceType.MASTER, timeout = 360)
     new_slave_id2 = master2.id
     utils.check_server_status(new_slave_id2, DBInstanceType.STANDBY, timeout = 360)
     
     rt_3 = utils.get_restorable_time(master_vid)
     assert rt_3.begin > failover1_time and rt_3.end > rt_2.end, (rt_3.begin, failover1_time, rt_3.end, rt_2.end)
     
     print rt_1.begin, rt_1.end
     print rt_2.begin, rt_2.end
     print rt_3.begin, rt_3.end
     print "the first failover time: %s" % (failover1_time)
     
     time.sleep(60)
     rt_4 = utils.get_restorable_time(master_vid)
     print rt_4.begin, rt_4.end
     assert rt_4.end > rt_3.end
Example #2
0
    def restore_to_point_in_time(self):
        self.create()
        old_master_inst = utils.get_builtin_instance(self.inst_id)
        master_vid = old_master_inst.virtual_instance_id
        
        db_count = 100
        utils.generate_databases(self.inst_id, count = db_count)        
        utils.check_generated_databases(self.inst_id, count = db_count)
        time.sleep(3)
        time1 = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 
        utils.clear_generated_databases(self.inst_id, count = db_count)
        time.sleep(3)
        dt2 = datetime.datetime.now()
        time2 = dt2.strftime('%Y-%m-%d %H:%M:%S')

        while True:
            restorable_time = utils.get_restorable_time(self.inst_id)
            dt = datetime.datetime.strptime(restorable_time.end, '%Y-%m-%d %H:%M:%S')
            if dt > dt2:
                break
            time.sleep(3)

        utils.restore_to_point_in_time_byclient(master_vid, time1) 
        time.sleep(10) # wait for compute instance appear
        new_inst1 = utils.get_restore_instance()
        utils.check_server_status(new_inst1, DBInstanceType.SINGLE, timeout=RESTORE_TIME_OUT) 
        utils.check_generated_databases(new_inst1, count = db_count)
        utils.delete_rds_byclient(new_inst1)
        utils.check_server_status(new_inst1, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.SINGLE,
                                  expected_svr_status=utils.ServiceStatuses.DELETED,
                                  deleted=True, timeout=CONF.trove_delete_timeout)
            
        # delete original instance
        utils.delete_rds_byclient(master_vid)
        utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.MASTER,
                                  expected_svr_status=utils.ServiceStatuses.DELETED,
                                  deleted=True, timeout=CONF.trove_delete_timeout)
 
        # restore to point in time after deleting
        utils.restore_to_point_in_time_byclient(master_vid, time1)
        time.sleep(10) # wait for compute instance appear
        new_inst2 = utils.get_restore_instance()
        utils.check_server_status(new_inst2, DBInstanceType.SINGLE, timeout=RESTORE_TIME_OUT)
        utils.check_generated_databases(new_inst2, count = db_count)
        utils.delete_rds_byclient(new_inst2)
        utils.check_server_status(new_inst2, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.SINGLE,
                                  expected_svr_status=utils.ServiceStatuses.DELETED,
                                  deleted=True, timeout=CONF.trove_delete_timeout)
Example #3
0
    def override_to_point_in_time(self):
        
        self.create()
        old_master_inst = utils.get_builtin_instance(self.inst_id)
        master_vid = old_master_inst.virtual_instance_id

        db_count = 100
        utils.generate_databases(self.inst_id, count = db_count)        
        utils.check_generated_databases(self.inst_id, count = db_count)
        time.sleep(3)
        time1 = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 
        utils.clear_generated_databases(self.inst_id, count = db_count)
        time.sleep(3)
        dt2 = datetime.datetime.now()
        time2 = dt2.strftime('%Y-%m-%d %H:%M:%S')

        while True:
            restorable_time = utils.get_restorable_time(self.inst_id)
            dt = datetime.datetime.strptime(restorable_time.end, '%Y-%m-%d %H:%M:%S')
            if dt > dt2:
                break
            time.sleep(3)
            
        utils.override_with_backup_byclient(master_vid, None, time1)
        utils.check_server_deleted(old_master_inst.id, DBInstanceType.PENDING, timeout = RESTORE_TIME_OUT)
        new_master_inst1 = utils.get_builtin_instance(master_vid)
        self.inst_id = new_master_inst1.id
        utils.check_generated_databases(self.inst_id, count = db_count)
        
        time.sleep(3)
        
        utils.override_with_backup_byclient(master_vid, None, time2)
        utils.check_server_deleted(self.inst_id, DBInstanceType.PENDING, timeout = RESTORE_TIME_OUT)
        utils.check_generated_databases(master_vid, count = 0)
        
        self.inst_id = utils.get_instance_id(self.group_id, DBInstanceType.MASTER)
        self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY)
        self.validate()
    def _failover_test(self, group_id, trigger_inst_id, 
                          do_workload = False, do_prepare = False,
                          mysqld_killed = False,
                          host_rebooted = False,
                          remove_tmp_initsql = False, 
                          mysql_data_lost = False,
                          check_vip = False, 
                          check_rpl_consist = True, 
                          check_binlog_range = False):
                
        LOG.info("Doing Failover Test, group_id:%s, instance_id:%s, do_workload:%s, do_prepare:%s." % 
                 (group_id, trigger_inst_id, do_workload, do_prepare))
        before_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False)
        before_items = set(map(lambda x: x.type + "_" + x.instance_id, before_group_items))
        
        before_instance = test_utils.get_builtin_instance( trigger_inst_id)
        before_rip = test_utils.check_allocate_ip(before_instance.server)
        before_origin_instid = before_instance.id
        
        
        rt_before = rt_after = None
        if check_binlog_range:
            rt_before = test_utils.get_restorable_time(trigger_inst_id)
        
        if do_workload and before_instance.type == DBInstanceType.MASTER:
            FAILOVERInstance.__run_workload(do_prepare = do_prepare)

        if remove_tmp_initsql:
            FAILOVERInstance.__trigger_vm_remove_tmp_sql_file(trigger_inst_id)
            
        if mysqld_killed:
            FAILOVERInstance.__trigger_mysqld_crash(trigger_inst_id)
            test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, 
                                           type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, 
                                           deleted=False, timeout=120)

        if host_rebooted:
            FAILOVERInstance.__trigger_host_reboot(trigger_inst_id)
            # when host-machine rebooted, no guestagent  update service's status. 
#             test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, 
#                                            type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, 
#                                            deleted=False, timeout=120)
        
        if mysql_data_lost:
            FAILOVERInstance.__trigger_mysql_data_lost(trigger_inst_id)

        rpc.call(test_utils.get_context(), "taskmanager", 
                        {"method": "failover", "args": {'instance_id':before_origin_instid}}, timeout = 3600)

    
        ## check vip <--> rip mapping.
        ## vip should be changed in 10 seconds.
        if before_instance.type == DBInstanceType.MASTER or before_instance.type == DBInstanceType.READ_REPLI:
            after_instance = test_utils.get_builtin_instance( trigger_inst_id)
            after_nova_inst = after_instance.server
            after_rip = test_utils.check_allocate_ip(after_nova_inst)
            assert  after_instance.vip == before_instance.vip and before_rip != after_rip
        
        if before_instance.type == DBInstanceType.MASTER:
            test_utils.check_server_status(before_instance.id, 
                                           expected_task = tasks.InstanceTasks.NONE, 
                                           type=DBInstanceType.MASTER, 
                                           expected_svr_status = test_utils.ServiceStatuses.RUNNING, 
                                           deleted=False, timeout=120)
        
        ## check replication topo
        after_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False)
        after_items = set(map(lambda x: x.type + "_" + x.instance_id, after_group_items))
        LOG.info("before " + str(before_items))
        LOG.info("after " + str(after_items))
        
        if check_rpl_consist:
            diff_items = (before_items - after_items)
#             assert len(diff_items) == 0
            assert len(before_group_items) == len(after_group_items), "size of mysql cluster should be the same."
            
            for group_item in after_group_items:
                
                if group_item.type == DBInstanceType.STANDBY and group_item.instance_id == before_instance.id:
                    item = InstanceGroupItem.get_by_instance_id(test_utils.get_context(), group_item.instance_id, deleted = False)
                    assert item != None
                    continue
                
                test_utils.check_server_status(group_item.instance_id, 
                                               expected_task = tasks.InstanceTasks.NONE, 
                                               type = group_item.type, 
                                               expected_svr_status = test_utils.ServiceStatuses.RUNNING, 
                                               deleted = False, timeout = 120)
                
        if check_binlog_range:
            rt_after = test_utils.get_restorable_time(trigger_inst_id)
            assert  rt_after.end > rt_before.end, (rt_after.end, rt_before.end)
            time.sleep(60)
            rt_after2 = test_utils.get_restorable_time(trigger_inst_id)
            assert  rt_after2.end > rt_after.end, (rt_after2.end, rt_after.end)
Example #5
0
 def _get_restorable_time(sleep = None):
     if sleep != None:
         time.sleep(sleep)
         
     restorable_time = utils.get_restorable_time(master_vid)
     return restorable_time