def TaskOb(task_name): '''任务操作''' with closing(zkHander()) as zkhander: _task_value = eval(zkhander.GetTaskContent(task_name)) if _task_value is not None: if 'add' in _task_value: return TaskClassify().TaskChange(_task_value[0]) elif 'white' in _task_value: '''白名单操作''' return TaskClassify().TaskWhite(_task_value) elif 'down' in _task_value: '''宕机需判断是否在白名单列表''' with closing(zkHander()) as zkhander: if zkhander.GetWhite(_task_value[0]): Logging(msg='this master {} has been down,but it in whitelist!!'.format(_task_value[0]),level='info') return True else: return TaskClassify().TaskDown(_task_value[0]) elif 'append' in _task_value: '''附加任务''' from db_handle import AdditionTask return AdditionTask.Addition().ChangeRepl(_task_value) else: Logging(msg='task failed state: type error',level='error') return False
def ChangeRepl(self, _content): try: groupname, region, type = _content[0], _content[1], _content[-1] if type == 'dow': #宕机任务,需重新选择节点并监听同步 for i in range(0, 3): host, port = self.__get_master_for_region( region, groupname) with closing(dbHandle(host, port)) as dbhandle: mysqlstate = dbhandle.RetryConn() # 检测是否能正常连接 time.sleep(1) if mysqlstate: zkHander().CreateWatch( host=host.replace('.', '-'), addition=True, region=region, region_for_groupname=groupname) # 重新创建master检测 else: return self.__change_new_master(region=region, groupname=groupname) elif type == 'up': #只进行监听,用于手动添加了同步任务 self.__up_watch_master(region=region, groupname=groupname) return True except: Logging(msg='addition task failed!', level='error') return False
def SetMaster(self, groupname, host, onlywatch=None): if onlywatch: value = [groupname, host.replace('.', '-'), 'white', 'onlywatch'] else: value = [groupname, host.replace('.', '-'), 'white'] path = GetConf().GetTaskPath() zkHander().DeleteWhite(groupname) return zk.create(path=path + '/task', sequence=True, value=str(value))
def Run(): SlaveDownCheck() #检查是否有已存在的宕机节点 zkHander().CreateChildrenWatch(path=GetConf().GetSlaveDown(), func=ManageDownNode) with closing(zkHander()) as zkhander: while True: SlaveCheck(zkhander).WhileCheckSLave() time.sleep(3) #每3秒扫描一次slave在线状态
def TaskCheck(self): '''获取task任务列表,执行并删除任务,并创建task任务实时监听''' task_path = GetConf().GetTaskPath() with closing(zkHander()) as zkhander: task_list = zkhander.GetTaskList() if task_list: for task in task_list: self.TaskFunc(task) zkHander().CreateChildrenWatch(task_path, self.TaskFunc) else: zkHander().CreateChildrenWatch(task_path,self.TaskFunc)
def TaskWhite(self,_task_value): '''手动操作集群''' group_name = _task_value[0] master_host = _task_value[1] zkHander().CreateWatch(master_host) # 对master创建watch with closing(zkHander()) as zkhander: zkhander.SetMasterHost(group_name, master_host) # 修改集群master指向 zkhander.DeleteWatchDown(group_name) # 删除该集群未处理的宕机信息 zkhander.SetHaproxyMeta(group_name,None, master_host) # 修改haproxy配置信息 if 'onlywatch' not in _task_value: SendRoute(group_name) return True
def __get_master_for_region(self, region, groupname): with closing(zkHander()) as zkhander: repl_path = GetConf().GetAdditionRPL( ) + '/' + groupname + '/' + region master_content = eval(zkhander.Get(repl_path)) host, port = master_content['host'], int(master_content['port']) return host, port
def __set_group_region(self, region, host_content): """设置该groupname连接对应region的master信息""" with closing(zkHander()) as zkhander: host_meta = eval( zkhander.GetMeta(name=self.host.replace('.', '-'), type='host')) groupname = host_meta['group'] path = GetConf().GetAdditionRPL() + '/' + groupname + '/' + region zkhander.Set(path=path, value=str(host_content)) zkHander().CreateWatch(host=host_content['host'].replace('.', '-'), addition=True, region=region, region_for_groupname=groupname) with closing(zkHander()) as zkhander: zkhander.DeleteWatchDown(groupname=groupname + '_' + region)
def __get_online_host(self, region): """获取在线列表""" reg_path = GetConf().GetAdditionRegion() + '/' + region with closing(zkHander()) as zkhander: reg_value_dict = eval( zkhander.Get(reg_path) ) #region存储格式为{'192-168-212-1':{'port':333,'ssl':0/1}.....} if reg_value_dict: _reg_online = [ host for host in reg_value_dict if zkhander.GetOnlineState(host) ] if _reg_online: _to_reg = { 'host': _reg_online[0].replace('-', '.'), 'port': reg_value_dict[_reg_online[0]]['port'], 'ssl': reg_value_dict[_reg_online[0]]['ssl'] } return _to_reg else: Logging( msg= 'This group has replication task ,But all region not online', level='warning') return None else: Logging( msg='This group has replication task ,But not region value', level='warning') return None
def TaskFunc(self,taskname): with closing(zkHander()) as zkhander: #检查其他server是否在执行 task_stat = zkhander.SetLockTask(taskname) if task_stat: with closing(zkHander()) as zkhander: state = TaskOb(taskname) if state: zkhander.DeleteTask(taskname) #删除已执行的任务 else: now_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) Logging(msg=' {} : this task {} failed'.format(now_time,taskname),level='error') Logging(msg='sleep time 3S ,waiting other server create watch',level='info') time.sleep(3) zkhander.DeleteLockTask(taskname) else: Logging(msg='task : {} elsewhere in the execution'.format(taskname),level='info') zkHander().CreateLockWatch(taskname)
def Region_meta(self,region): """能提供同步的元数据""" _host_content = {} for host in self.host_content: _host_content[host.replace('.','-')] = self.host_content[host] with closing(zkHander()) as zkhander: path = GetConf().GetAdditionRegion() + '/' + region zkhander.Create(path=path,value=str(_host_content),seq=False,mp=True)
def ResetMaster(self,groupname): try: '''获取当前binlog读取位置''' append_stat=None master_log_file,read_master_log_pos,master_host = self.CheckPos(get_host=True) '''================''' #用于mysql宕机,服务器在线追加数据 from zk_handle.zkHandler import zkHander from Append.AppendValue import Append from lib.get_conf import GetConf from contextlib import closing with closing(zkHander()) as zkhander: client_stat = zkhander.CheckOnlineClient(master_host) if client_stat: __get_content = {'getbinlog': 10010, 'binlog_file': master_log_file, 'start_position': read_master_log_pos} Logging(msg='gets the unsynchronized data not. info:{}'.format(__get_content),level='info') append_stat = Append(connection=self.local_conn,cursor=self.mysql_cur,host=master_host,port=GetConf().GetClientPort()).receive(conn_info=str(__get_content)) if append_stat: Logging(msg='Append OK',level='info') else: Logging(msg='Append Failed',level='error') '''=================''' if master_host: readbinlog_status = str([groupname,master_log_file,read_master_log_pos]) execute_gtid = str([groupname,self.__CetGtid()]) with closing(zkHander()) as zkhander: if append_stat: zkhander.SetExecuteGtid(master_host, execute_gtid) else: zkhander.SetReadBinlog(master_host,readbinlog_status) zkhander.SetExecuteGtid(master_host,execute_gtid) '''''' #self.mysql_cur.execute('set global read_only=0;') self.mysql_cur.execute('stop slave') self.mysql_cur.execute('reset slave all;') self.__set_variables(type='master') except MySQLdb.Warning,e: Logging(msg=traceback.format_exc(),level='warning') self.mysql_cur.execute('reset slave all;') self.__set_variables(type='master')
def SendRoute(group_name, slavedown=None): with closing(zkHander()) as zkhander: route_content = zkhander.GetRouter(group_name) # 传递路由配置修改信息 if route_content: _route_content = route_content.split(',') for _content in _route_content: try: with closing(TcpClient(_content)) as tcpclient: send_stat = tcpclient.Send(group_name) except Exception, e: Logging(msg=traceback.format_exc(), level='error') if not send_stat: if slavedown: return False else: with closing(zkHander()) as zkhander: zkhander.SetWatchDown(group_name, 'failed')
def AddRoute(groupname,route): '''添加路由''' route_path = GetConf().GetRouter() with closing(zkHander()) as zkhander: stat = zkhander.Exists(route_path+'/'+groupname) if stat is None: zkhander.Create(path=route_path+'/'+groupname,value=route,seq=False) else: print 'this route already exists'
def __change_new_master(self, region, groupname): with closing(zkHander()) as zkhander: addition_master = self.__get_online_host(region) cur_master = zkhander.GetMasterMeta(groupname) host_meta = zkhander.GetMeta(type='host', name=cur_master) host_port = eval(host_meta)['port'] exe_add = ExecuteAdditionTask(host=cur_master.replace('-', '.'), port=int(host_port)) return exe_add.Change(region, addition_master)
def AddMeta(groupname,hosts): '''对已经存在的集群增加节点元数据''' _hosts = hosts.split(':') group_path = GetConf().GetMetaGroup() host_path = GetConf().GetMetaHost() with closing(zkHander()) as zkhander: group_hosts = zkhander.GetMeta(type='group',name=groupname) zkhander.Set(path=group_path+'/'+groupname,value=group_hosts+','+_hosts[0].replace('.','-')) value = {'group':groupname,'port':_hosts[1]} zkhander.Create(path=host_path+'/'+_hosts[0].replace('.','-'),value=str(value),seq=False)
def TaskDown(self,groupname): '''master宕机触发任务''' with closing(zkHander()) as zkhander: cur_master = zkhander.GetMasterMeta(groupname) for i in range(0,3): with closing(zkHander()) as zkhander: host_meta = zkhander.GetMeta(type='host', name=cur_master) host_port = eval(host_meta)['port'] with closing(dbHandle(cur_master.replace('-','.'),host_port)) as dbhandle: mysqlstate = dbhandle.RetryConn() #检测mysql是否能正常连接 time.sleep(1) if mysqlstate: zkHander().CreateWatch(cur_master) #重新创建master检测 return True else: #宕机重选master now_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) Logging(msg=' {} : group {} the current master {} state: down'.format(now_time,groupname,cur_master),level='error') return self.TaskChange(groupname,type='change')
def SetHa(groupname,host): '''宕机恢复后修改haproxy配置文件,使恢复的节点接收查询''' path = GetConf().GetHaproxy() _value = zkHander().Get(path+'/'+groupname) value = eval(_value) meta_path = GetConf().GetMetaHost() _port = zkHander().Get(meta_path+'/'+host.replace('.','-')) port = eval(_port)['port'] _add_host = host+':'+str(port) _read_list = eval(value['read']) if _add_host not in _read_list: _read_list.append(host+':'+str(port)) value['read'] = _read_list print value zkHander().SetHaproxyMeta(groupname, value['read'], value['write'], type=1) SendRoute(groupname) return "OK" else: print 'this host already exists'
def AlterHaproxy(self, groupname, delete_host, port): delete_host_str = '{}:{}'.format(delete_host, port) with closing(zkHander()) as zkhander: result = zkhander.GetHaproxy(groupname=groupname) read_list = eval(result['read']) if delete_host_str in read_list: read_list.remove(delete_host_str) #删除宕机slave节点 zkhander.SetHaproxyMeta(group=groupname, reads=read_list, master=result['write'], type=1) return SendRoute(group_name=groupname)
def __check_repl(self, groupname): repl_path = GetConf().GetAdditionRPL() + '/' + groupname region_value = {} with closing(zkHander()) as zkhander: state = zkhander.Exists(repl_path) if state: region_list = zkhander.GetChildren(repl_path) for region in region_list: region_value[region] = zkhander.Get(repl_path + '/' + region) return region_value else: return None
def Rel_Meta(self, groupname, region): """同步任务需要同步的节点信息""" host_content = { 'host': self.host, 'port': self.port, 'ssl': self.ssl } if self.host and self.port and self.ssl else None with closing(zkHander()) as zkhander: path = GetConf().GetAdditionRPL() + '/' + groupname + '/' + region zkhander.Create(path=path, value=str(host_content), seq=False, mp=True)
def __get__region_con(self, region_value): addition_master = {} #记录需要连接的节点信息 with closing(zkHander()) as zkhander: for region, host in region_value.items(): if host != 'None': if zkhander.GetOnlineState( eval(host)['host'] ): #replication下region存的当前连接节点格式为{'host':'192.168.212.1','port':3306,'ssl':1/0.....} addition_master[region] = eval(host) else: addition_master[region] = self.__get_online_host( region) else: addition_master[region] = self.__get_online_host(region) return addition_master
def Init(self): with closing(zkHander()) as zkhander: zkhander.InitNode() master_hosts = zkhander.GetMasterGroupHosts( ) #启动检查是否已有活动的master,如已经有活动的就直接监控,并进行任务检查 #如没有则直接进行任务检查 if master_hosts is not None: Watch().StartWatch(master_hosts=master_hosts) TaskCh().TaskCheck() '''扫描slave节点状态,用于一主多从,每隔3S检测所有slave节点在线状态,如宕机及时从路由节点删除''' '''====================''' import multiprocessing p = multiprocessing.Process(target=SlaveCheckRun, args=()) p.start() '''===================='''
def InsertClusterMeta(groupname,hosts): '''插入新集群元数据''' _hosts = hosts.split(',') _host_list = [] _host_port = {} for host in _hosts: _host_port[host.split(':')[0]] = host.split(':')[1] _host_list.append(host.split(':')[0]) group_path = GetConf().GetMetaGroup() host_path = GetConf().GetMetaHost() with closing(zkHander()) as zkhander: if zkhander.Exists(path=group_path+'/'+groupname) is None: zkhander.Create(path=group_path+'/'+groupname,value=','.join(_host_list).replace('.','-'),seq=False) else: print '%s is already exists' % groupname for host in _host_port: value = {'group':groupname,'port':_host_port[host]} zkhander.Create(path=host_path+'/'+host.replace('.','-'),value=str(value),seq=False)
def StaticInfo(self, result, host): with closing(zkHander()) as zkhander: lock_state = zkhander.SetLockTask(host) if lock_state: online_state = zkhander.Exists('{}/{}'.format( self.online_node, host)) if online_state is None: port, groupname = result['port'], result['groupname'] for i in range(0, 3): with closing(dbHandle(Replace(host), port)) as dbhandle: mysqlstate = dbhandle.RetryConn() # 检测mysql是否能正常连接 time.sleep(1) if mysqlstate: zkhander.DeleteSlaveDown(host) Logging( msg= 'Groupname:{} slave host:{} is online,but python client server is not online!' .format(groupname, Replace(host)), level='warning') else: alter_state = self.AlterHaproxy( groupname=groupname, delete_host=Replace(host), port=port) if alter_state: zkhander.DeleteSlaveDown(host) zkhander.DeleteLockTask(host) else: zkhander.DeleteLockTask(host) else: zkhander.DeleteSlaveDown(host) else: Logging(msg='slave:{} outage task elsewhere in the execution'. format(Replace(host)), level='warning')
def ManageDownNode(host): slave_down_path = GetConf().GetSlaveDown() with closing(zkHander()) as zkhander: result = zkhander.Get(path='{}/{}'.format(slave_down_path, host)) SlaveCheck().StaticInfo(result=eval(result), host=host)
def __get_groupname(self): with closing(zkHander()) as zkhander: host_meta = eval(zkhander.GetMeta(name=self.host, type='host')) groupname = host_meta['group'] return groupname
def __up_watch_master(self, region, groupname): host, _ = self.__get_master_for_region(region, groupname) zkHander().CreateWatch(host, addition=True, region=region)
def SlaveDownCheck(): with closing(zkHander()) as zkhander: downlist = zkhander.GetDownSlaveList() if downlist: for host in downlist: ManageDownNode(host=host)
def SetWhite(self, groupname): path = GetConf().GetWhitePath() return zkHander().Create(path=path + '/' + groupname, value='', seq=False)