Example #1
0
 def migrate(self, instance_id=None):
     self.inst_id = instance_id
     if not self.inst_id:
         self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.SINGLE)
     
     utils.check_server_status(self.inst_id, 
                               expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.SINGLE,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False)
     
     inst_db_info = utils.get_instance(self.inst_id, deleted= False)
     old_nova_server_id = inst_db_info.compute_instance_id
     
     ran_count = 45
     utils.generate_databases(self.inst_id, count = ran_count)
     _ret = rpc.call(utils.get_context(),
                     "taskmanager",
                     {"method": "migrate",
                      "args": {'migrate_id': inst_db_info.id}})
     
     utils.check_server_status(self.inst_id, 
                               expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.SINGLE,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False)
     
     new_nova_server_id = utils.get_instance(self.inst_id, deleted= False).compute_instance_id
     assert old_nova_server_id != new_nova_server_id
     
     utils.check_generated_databases(self.inst_id, count = ran_count)
     self.validate()
Example #2
0
 def _call(self,topic,msg):
     try:
         result = rpc.call(self.context,topic,msg)
         LOG.debug("Result is %s" % result)
         return result
     except Exception as e:
         LOG.error(e)
         raise e
 def _send_msg_with_timeout(self):
     self.rabbit.declare_queue(topic_name())
     context = TroveContext(is_admin=True, limit=5, marker=None)
     version = rpc.call(context,
                        topic_name(),
             {"method": "version",
              "args": {"package_name": "dpkg"}
         })
     return {"status": "good", "version": version}
Example #4
0
 def _send_msg_with_timeout(self):
     self.rabbit.declare_queue(topic_name())
     context = TroveContext(is_admin=True, limit=5, marker=None)
     version = rpc.call(context, topic_name(), {
         "method": "version",
         "args": {
             "package_name": "dpkg"
         }
     })
     return {"status": "good", "version": version}
Example #5
0
    def migrate(self):
        
        rr_inst_id = self._get_rid()
        old_rr_inst = utils.get_builtin_instance(rr_inst_id)
        old_rr_id = old_rr_inst.id
        
        rr_vid = old_rr_inst.virtual_instance_id
        
        _ret = rpc.call(utils.get_context(), "taskmanager",
                       {"method": "migrate",
                        "args": {'migrate_id': old_rr_inst.id}})

        utils.check_server_status(rr_vid, DBInstanceType.READ_REPLI, InstanceTasks.NONE, utils.ServiceStatuses.RUNNING, timeout = 600)
        new_rr_inst = utils.get_builtin_instance(rr_vid)
        assert new_rr_inst.virtual_instance_id == rr_vid and new_rr_inst.id != old_rr_id, (rr_vid, new_rr_inst.id)
        
        master_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER)
        self._validate(master_id, rr_vid)
Example #6
0
 def force_migrate(self):
     force_host = "rds_zone_1:rds-control-18-220.ksc.com"
     
     self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.SINGLE)
     _ret = rpc.call(utils.get_context(),
                     "taskmanager",
                     {"method": "migrate",
                      "args": {'migrate_id': self.inst_id, 'host' : force_host }})
     
     self.inst_id = _ret['id']
     self.validate()
     
     new_inst = utils.get_instance(self.inst_id)
     nova_id = new_inst.compute_instance_id
     info = utils.get_nova_server_info(nova_id)
     
     if info['OS-EXT-SRV-ATTR:hypervisor_hostname'] != force_host:
         LOG.error("expected host is %s, finally host is %s" % (force_host, info['OS-EXT-SRV-ATTR:hypervisor_hostname']))
Example #7
0
 def _failover(self, stop_mysqld = False, rm_mysql_data = False):
     self.inst_id = self._get_rid()
     utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.READ_REPLI,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False,timeout=10)
     self.vip_id = utils.get_vip_id(self.inst_id)
     _rr_server = utils.get_builtin_instance(self.inst_id)
     nova_instance = _rr_server.server
     self.group_id = _rr_server.db_info.group_id
     instance_id = self.inst_id
     
     ran_count = 56
     utils.generate_databases(self.master_id, count = ran_count)
     
     ip = utils.check_allocate_ip(nova_instance)
     if stop_mysqld:
         utils.stop_mysqld(ip)
     
     if rm_mysql_data:
         utils.mysql_data_lost(ip)
     
     utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.READ_REPLI,
                               expected_svr_status=utils.ServiceStatuses.SHUTDOWN,
                               deleted=False,timeout=120)
     _ret = rpc.call(utils.get_context(),"taskmanager",
                     {
                      "method": "failover",
                      "args": {'instance_id':instance_id}
                      }
                     )
     
     utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.READ_REPLI,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False,timeout=120)
     
     self.master_id = utils.get_instance_id(self.group_id,DBInstanceType.MASTER)
     self.inst_id = self.inst_id
     
     utils.check_generated_databases(self.inst_id, count = ran_count)
     self.validate()
Example #8
0
    def call(self, context, msg, topic=None, version=None, timeout=None):
        """rpc.call() a remote method.

        :param context: The request context
        :param msg: The message to send, including the method and args.
        :param topic: Override the topic for this message.
        :param version: (Optional) Override the requested API version in this
               message.
        :param timeout: (Optional) A timeout to use when waiting for the
               response.  If no timeout is specified, a default timeout will be
               used that is usually sufficient.

        :returns: The return value from the remote method.
        """
        self._set_version(msg, version)
        real_topic = self._get_topic(topic)
        try:
            return rpc.call(context, real_topic, msg, timeout)
        except rpc.common.Timeout as exc:
            raise rpc.common.Timeout(
                exc.info, real_topic, msg.get('method'))
Example #9
0
    def call(self, context, msg, topic=None, version=None, timeout=None):
        """rpc.call() a remote method.

        :param context: The request context
        :param msg: The message to send, including the method and args.
        :param topic: Override the topic for this message.
        :param version: (Optional) Override the requested API version in this
               message.
        :param timeout: (Optional) A timeout to use when waiting for the
               response.  If no timeout is specified, a default timeout will be
               used that is usually sufficient.

        :returns: The return value from the remote method.
        """
        self._set_version(msg, version)
        msg['args'] = self._serialize_msg_args(context, msg['args'])
        real_topic = self._get_topic(topic)
        try:
            result = rpc.call(context, real_topic, msg, timeout)
            return self.serializer.deserialize_entity(context, result)
        except rpc.common.Timeout as exc:
            raise rpc.common.Timeout(exc.info, real_topic, msg.get('method'))
    def _failover_test(self, group_id, trigger_inst_id, 
                          do_workload = False, do_prepare = False,
                          mysqld_killed = False,
                          host_rebooted = False,
                          remove_tmp_initsql = False, 
                          mysql_data_lost = False,
                          check_vip = False, 
                          check_rpl_consist = True, 
                          check_binlog_range = False):
                
        LOG.info("Doing Failover Test, group_id:%s, instance_id:%s, do_workload:%s, do_prepare:%s." % 
                 (group_id, trigger_inst_id, do_workload, do_prepare))
        before_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False)
        before_items = set(map(lambda x: x.type + "_" + x.instance_id, before_group_items))
        
        before_instance = test_utils.get_builtin_instance( trigger_inst_id)
        before_rip = test_utils.check_allocate_ip(before_instance.server)
        before_origin_instid = before_instance.id
        
        
        rt_before = rt_after = None
        if check_binlog_range:
            rt_before = test_utils.get_restorable_time(trigger_inst_id)
        
        if do_workload and before_instance.type == DBInstanceType.MASTER:
            FAILOVERInstance.__run_workload(do_prepare = do_prepare)

        if remove_tmp_initsql:
            FAILOVERInstance.__trigger_vm_remove_tmp_sql_file(trigger_inst_id)
            
        if mysqld_killed:
            FAILOVERInstance.__trigger_mysqld_crash(trigger_inst_id)
            test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, 
                                           type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, 
                                           deleted=False, timeout=120)

        if host_rebooted:
            FAILOVERInstance.__trigger_host_reboot(trigger_inst_id)
            # when host-machine rebooted, no guestagent  update service's status. 
#             test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, 
#                                            type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, 
#                                            deleted=False, timeout=120)
        
        if mysql_data_lost:
            FAILOVERInstance.__trigger_mysql_data_lost(trigger_inst_id)

        rpc.call(test_utils.get_context(), "taskmanager", 
                        {"method": "failover", "args": {'instance_id':before_origin_instid}}, timeout = 3600)

    
        ## check vip <--> rip mapping.
        ## vip should be changed in 10 seconds.
        if before_instance.type == DBInstanceType.MASTER or before_instance.type == DBInstanceType.READ_REPLI:
            after_instance = test_utils.get_builtin_instance( trigger_inst_id)
            after_nova_inst = after_instance.server
            after_rip = test_utils.check_allocate_ip(after_nova_inst)
            assert  after_instance.vip == before_instance.vip and before_rip != after_rip
        
        if before_instance.type == DBInstanceType.MASTER:
            test_utils.check_server_status(before_instance.id, 
                                           expected_task = tasks.InstanceTasks.NONE, 
                                           type=DBInstanceType.MASTER, 
                                           expected_svr_status = test_utils.ServiceStatuses.RUNNING, 
                                           deleted=False, timeout=120)
        
        ## check replication topo
        after_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False)
        after_items = set(map(lambda x: x.type + "_" + x.instance_id, after_group_items))
        LOG.info("before " + str(before_items))
        LOG.info("after " + str(after_items))
        
        if check_rpl_consist:
            diff_items = (before_items - after_items)
#             assert len(diff_items) == 0
            assert len(before_group_items) == len(after_group_items), "size of mysql cluster should be the same."
            
            for group_item in after_group_items:
                
                if group_item.type == DBInstanceType.STANDBY and group_item.instance_id == before_instance.id:
                    item = InstanceGroupItem.get_by_instance_id(test_utils.get_context(), group_item.instance_id, deleted = False)
                    assert item != None
                    continue
                
                test_utils.check_server_status(group_item.instance_id, 
                                               expected_task = tasks.InstanceTasks.NONE, 
                                               type = group_item.type, 
                                               expected_svr_status = test_utils.ServiceStatuses.RUNNING, 
                                               deleted = False, timeout = 120)
                
        if check_binlog_range:
            rt_after = test_utils.get_restorable_time(trigger_inst_id)
            assert  rt_after.end > rt_before.end, (rt_after.end, rt_before.end)
            time.sleep(60)
            rt_after2 = test_utils.get_restorable_time(trigger_inst_id)
            assert  rt_after2.end > rt_after.end, (rt_after2.end, rt_after.end)
Example #11
0
 def failover(id):
     rpc.call(utils.get_context(), "taskmanager",
          {"method": "failover",
            "args": {'instance_id':id}})
Example #12
0
    def failover(self, instance_id=None, _strategy=None):
        self.inst_id = instance_id
        if not self.inst_id:
            self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER)
        _inst = utils.get_builtin_instance(self.inst_id)
        self.group_id = _inst.group_id
        self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY)
        
        utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.MASTER,
                                  expected_svr_status=utils.ServiceStatuses.RUNNING,
                                  deleted=False, timeout=120)
        
        utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.STANDBY,
                                  expected_svr_status=utils.ServiceStatuses.RUNNING,
                                  deleted=False, timeout=120)
        self.vip_id = utils.get_vip_id(self.inst_id)
        
        strategy = CONF.ha_failover_strategy
        virtual_instance_id = None
        if strategy == 'master':
            _ret = utils.get_builtin_instance(self.inst_id)
            nova_instance = _ret.server
            instance_id = _ret.id
            type = DBInstanceType.MASTER
            virtual_instance_id = _ret.virtual_instance_id
        elif strategy == 'standby':
            _ret = utils.get_builtin_instance(self.dbslave_id)
            nova_instance = _ret.server
            instance_id = _ret.id
            type = DBInstanceType.STANDBY
        else:
            raise Exception("not found ha_failover_strategy %s" % strategy)
            
        
        rancount = random.randint(50, 100)
        utils.generate_databases(self.inst_id, count = rancount)
        
        ip = utils.check_allocate_ip(nova_instance)
        utils.stop_mysqld(ip, stop_ga=True)
        utils.check_server_status(instance_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=type,
                                  expected_svr_status=utils.ServiceStatuses.SHUTDOWN,
                                  deleted=False, timeout=120)
        rpc.call(utils.get_context(), "taskmanager",
                     {"method": "failover",
                       "args": {'instance_id':instance_id}})
        
        if strategy == 'master':
            origin_inst_id = inst_utils.virtual_instid_2_origin_instid(virtual_instance_id)
            self.inst_id = origin_inst_id
            
            utils.check_server_status(origin_inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                      type=DBInstanceType.MASTER,
                                      expected_svr_status=utils.ServiceStatuses.RUNNING,
                                      deleted=False, timeout=120)

        new_slave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) 
        utils.check_server_status(new_slave_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.STANDBY,
                                  expected_svr_status=utils.ServiceStatuses.RUNNING,
                                  deleted=False, timeout=120)

        self.dbslave_id = new_slave_id
        
        utils.check_generated_databases(self.inst_id, count = rancount)
        utils.clear_generated_databases(self.dbslave_id, count = rancount)
        self.validate()
Example #13
0
 def migrate(self, instance_id=None, _strategy=None):
     self.inst_id = instance_id
     if not self.inst_id:
         self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER)
     _master_inst = utils.get_instance(id = self.inst_id, deleted = 0)
     self.group_id = _master_inst.group_id
     self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY)
     
     utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.MASTER,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False, timeout=120)
     
     utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.STANDBY,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False, timeout=120)
     
     self.vip_id = utils.get_vip_id(_master_inst.id)
     
     virtual_instance_id = None 
     strategy = CONF.migrate_strategy
     if strategy == 'master':
         _ret = utils.get_builtin_instance(_master_inst.id)
         nova_instance = _ret.server
         instance_id = _master_inst.id
         type = DBInstanceType.MASTER
         virtual_instance_id = _ret.virtual_instance_id
         
     elif strategy == 'standby':
         _ret = utils.get_builtin_instance(self.dbslave_id)
         nova_instance = _ret.server
         instance_id = self.dbslave_id
         type = DBInstanceType.STANDBY
     else:
         raise Exception("not found migrate_strategy ss%s" % strategy)
     
     ran_count = random.randint(50, 100)
     utils.generate_databases(self.inst_id, count = ran_count)    
     
     _ret = rpc.call(utils.get_context(), "taskmanager",
                  {"method": "migrate",
                   "args": {'migrate_id':instance_id}})
     
     if strategy == 'master':
         raw_instance_id = inst_utils.virtual_instid_2_origin_instid(virtual_instance_id) 
         new_server_id = utils.get_builtin_instance(raw_instance_id).server_id
         utils.check_server_status(raw_instance_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                   type=DBInstanceType.MASTER,
                                   expected_svr_status=utils.ServiceStatuses.RUNNING,
                                   deleted=False, timeout=120)
         assert new_server_id != nova_instance.id
         self.inst_id = raw_instance_id
         
     elif strategy == "standby":
         utils.check_server_status(_ret['id'], expected_task=utils.tasks.InstanceTasks.NONE, 
                                   type = DBInstanceType.STANDBY, 
                                   expected_svr_status=utils.ServiceStatuses.RUNNING,
                                   deleted=False, timeout = 123)
         self.dbslave_id = _ret['id']
         
     utils.check_generated_databases(self.inst_id, count = ran_count)
     utils.check_generated_databases(self.dbslave_id, count = ran_count)
     utils.clear_generated_databases(self.inst_id, count = ran_count)
     self.validate()