def upgrade(self, req, agent_id, body=None): """call by client, and asyncrequest do not need Idsformater, check it in send_asyncrequest send rpm file to upgrade code of agent """ body = body or {} asyncrequest = self.create_asyncrequest(body) target = targetutils.target_all(fanout=True) async_ctxt = dict(pre_run=body.pop('pre_run', None), after_run=body.pop('after_run', None), post_run=body.pop('post_run', None)) rpc_method = 'upgrade_agent' rpc_ctxt = {} agents = self.agents_id_check(agent_id) if agent_id != 'all': rpc_ctxt.setdefault('agents', agents) global_data = get_global() glock = global_data.lock('agents') def wapper(): with glock(agents): self.send_asyncrequest(asyncrequest, target, rpc_ctxt, rpc_method, None, async_ctxt) threadpool.add_thread(safe_func_wrapper, wapper, LOG) return resultutils.results(result='Upgrade agent async request thread spawning', data=[asyncrequest.to_dict()])
def delete(self, req, agent_id, body=None): """call buy agent""" # if force is true # will not notify agent, just delete agent from database body = body or {} force = body.get('force', False) rpc = get_client() global_data = get_global() metadata = None with global_data.delete_agent(agent_id) as agent: if not force: metadata = BaseContorller.agent_metadata(agent.agent_id) if metadata is None: raise RpcPrepareError('Can not delete offline agent, try force') agent_ipaddr = metadata.get('local_ip') secret = uuidutils.generate_uuid() # tell agent wait delete finishtime, timeout = rpcfinishtime() delete_agent_precommit = rpc.call(targetutils.target_agent(agent), ctxt={'finishtime': finishtime}, msg={'method': 'delete_agent_precommit', 'args': {'agent_id': agent.agent_id, 'agent_type': agent.agent_type, 'host': agent.host, 'agent_ipaddr': agent_ipaddr, 'secret': secret} }, timeout=timeout) if not delete_agent_precommit: raise RpcResultError('delete_agent_precommit result is None') if delete_agent_precommit.get('resultcode') != manager_common.RESULT_SUCCESS: return resultutils.results(result=delete_agent_precommit.get('result'), resultcode=manager_common.RESULT_ERROR) # if not force: # tell agent delete itself finishtime = rpcfinishtime()[0] LOG.info('Delete agent %s postcommit with secret %s' % (agent_ipaddr, secret)) rpc.cast(targetutils.target_agent(agent), ctxt={'finishtime': finishtime}, msg={'method': 'delete_agent_postcommit', 'args': {'agent_id': agent.agent_id, 'agent_type': agent.agent_type, 'host': agent.host, 'agent_ipaddr': agent_ipaddr, 'secret': secret}}) def wapper(): rpc.cast(targetutils.target_rpcserver(fanout=True), msg={'method': 'deletesource', 'args': {'agent_id': agent_id}}) threadpool.add_thread(safe_func_wrapper, wapper, LOG) result = resultutils.results(result='Delete agent success', data=[dict(agent_id=agent.agent_id, host=agent.host, status=agent.status, metadata=metadata, ports_range=jsonutils.safe_loads_as_bytes(agent.ports_range) or []) ]) return result
def execute(self): async = self.kwargs.pop('async') def http(): requests.request(**self.kwargs) if async: threadpool.add_thread(safe_func_wrapper, http, executer.LOG) else: http()
def _create_database(self, session, database, bond, **kwargs): req = kwargs.pop('req') agent_id = kwargs.pop('agent_id', None) if not agent_id: _kwargs = {} if database.slave > 0: _kwargs['slave'] = True else: _kwargs['master'] = True chioces = self._select_agents(database.dbtype, **_kwargs) if chioces: agent_id = chioces[0] LOG.info('Auto select database agent %d' % agent_id) else: raise InvalidArgument('Not agent found for %s' % common.DB) body = dict(dbtype=database.dbtype, auth=dict(user=database.user, passwd=database.passwd)) configs = kwargs.pop('configs', {}) body.update(kwargs) if database.slave: configs['relaylog'] = True body['configs'] = configs elif bond: _host, _port = self._get_entity(req=req, entity=int(bond.reflection_id), raise_error=True) configs['binlog'] = True # 发送从库信息到新增主库所在agent _slave = { 'bond': dict(database_id=bond.database_id, host=_host, port=_port), 'configs': configs, } body.update(_slave) create_result = entity_controller.create(req=req, agent_id=agent_id, endpoint=common.DB, body=body)['data'][0] rpc_result = create_result.get('notify') entity = create_result.get('entity') port = rpc_result.get('port') host = rpc_result.get('connection') database.impl = 'local' database.status = common.UNACTIVE database.reflection_id = str(entity) # 通知端口添加 threadpool.add_thread(port_controller.unsafe_create, agent_id, common.DB, entity, [ port, ]) yield host, port
def overtime(self, req, request_id, body): """ agent not response, async checker send a overtime respone 此接口为保留接口,接口功能已经在rpc server中实现 """ jsonutils.schema_validate(body, OVERTIMESCHEMA) agent_time = body.get('agent_time') agents = set(body.get('agents')) session = get_session() query = model_query(session, AsyncRequest).filter_by(request_id=request_id) asynecrequest = query.one() if asynecrequest.status == manager_common.FINISH: raise InvalidArgument('Async request has been finished') def bluk(): bulk_data = [] for agent_id in agents: data = dict(request_id=request_id, agent_id=agent_id, agent_time=agent_time, server_time=int(time.time()), resultcode=manager_common.RESULT_OVER_FINISHTIME, result='Agent respone overtime') bulk_data.append(data) responeutils.bluk_insert( storage=get_cache() if asynecrequest.expire else session, agents=agents, bulk_data=bulk_data, expire=asynecrequest.expire) if agents: query.update({ 'status': manager_common.FINISH, 'resultcode': manager_common.RESULT_NOT_ALL_SUCCESS, 'result': '%d agent not respone' % len(agents) }) else: query.update({ 'status': manager_common.FINISH, 'resultcode': manager_common.RESULT_SUCCESS, 'result': 'all agent respone result' % len(agents) }) session.flush() session.close() threadpool.add_thread(bluk) return resultutils.results(result='Post agent overtime success')
def status(self, req, agent_id, body=None): """get status from agent, not from database do not need Idsformater, check it in send_asyncrequest """ body = body or {} body.setdefault('expire', 180) asyncrequest = self.create_asyncrequest(body) target = targetutils.target_all(fanout=True) rpc_ctxt = {} if agent_id != 'all': rpc_ctxt.setdefault('agents', self.agents_id_check(agent_id)) rpc_method = 'status_agent' rpc_args = body def wapper(): self.send_asyncrequest(asyncrequest, target, rpc_ctxt, rpc_method, rpc_args) threadpool.add_thread(safe_func_wrapper, wapper, LOG) return resultutils.results(result='Status agent async request thread spawning', data=[asyncrequest.to_dict()])
def send(self, req, agent_id, md5, body=None): """call by client, and asyncrequest send file to agents """ body = body or {} asyncrequest = self.create_asyncrequest(body) target = targetutils.target_all(fanout=True) async_ctxt = dict(pre_run=body.pop('pre_run', None), after_run=body.pop('after_run', None), post_run=body.pop('post_run', None)) rpc_method = 'getfile' rpc_args = {'md5': md5, 'timeout': asyncrequest.deadline - 1} rpc_ctxt = {} if agent_id != 'all': rpc_ctxt.setdefault('agents', self.agents_id_check(agent_id)) def wapper(): self.send_asyncrequest(asyncrequest, target, rpc_ctxt, rpc_method, rpc_args, async_ctxt) threadpool.add_thread(safe_func_wrapper, wapper, LOG) return resultutils.results(result='Send file to agents thread spawning', data=[asyncrequest.to_dict()])
def clean(self, req, group_id, objtype, entity, body=None): """彻底删除entity""" body = body or {} action = body.pop('clean', 'unquote') force = False ignores = body.pop('ignores', []) if action not in ('delete', 'unquote', 'force'): raise InvalidArgument('clean option value error') if action == 'force': action = 'delete' force = True group_id = int(group_id) entity = int(entity) session = endpoint_session() glock = get_gamelock() metadata, ports = self._entityinfo(req=req, entity=entity) if not metadata: raise InvalidArgument('Agent offline, can not delete entity') query = model_query(session, AppEntity, filter=AppEntity.entity == entity) query = query.options(joinedload(AppEntity.databases, innerjoin=False)) _entity = query.one() rollbacks = [] def _rollback(): for back in rollbacks: __database_id = back.get('database_id') __schema = back.get('schema') __quote_id = back.get('quote_id') rbody = dict(quote_id=__quote_id, entity=entity) rbody.setdefault(dbcommon.ENDPOINTKEY, common.NAME) try: schema_controller.bond(req, database_id=__database_id, schema=__schema, body=rbody) except Exception: LOG.error('rollback entity %d quote %d.%s.%d fail' % (entity, __database_id, schema, __quote_id)) with glock.grouplock(group=group_id): target = targetutils.target_agent_by_string(metadata.get('agent_type'), metadata.get('host')) target.namespace = common.NAME rpc = get_client() finishtime, timeout = rpcfinishtime() LOG.warning('Clean entity %s.%d with action %s' % (objtype, entity, action)) with session.begin(): rpc_ret = rpc.call(target, ctxt={'finishtime': finishtime}, msg={'method': 'stoped', 'args': dict(entity=entity)}) if not rpc_ret: raise RpcResultError('check entity is stoped result is None') if rpc_ret.get('resultcode') != manager_common.RESULT_SUCCESS: raise RpcResultError('check entity is stoped fail, running') with session.begin(): if _entity.status != common.DELETED: raise InvalidArgument('Entity status is not DELETED, ' 'mark status to DELETED before delete it') if _entity.objtype != objtype: raise InvalidArgument('Objtype not match') if _entity.group_id != group_id: raise InvalidArgument('Group id not match') # esure database delete if action == 'delete': LOG.warning('Clean option is delete, can not rollback when fail') if not force: for _database in _entity.databases: schema = '%s_%s_%s_%d' % (common.NAME, objtype, _database.subtype, entity) schema_info = schema_controller.show(req=req, database_id=_database.database_id, schema=schema, body={'quotes': True})['data'][0] quotes = {} for _quote in schema_info['quotes']: quotes[_quote.get('quote_id')] = _quote.get('desc') if _database.quote_id not in quotes.keys(): # if set(quotes) != set([_database.quote_id]): result = 'delete %s:%d fail' % (objtype, entity) reason = ': database [%d].%s quote: %s' % (_database.database_id, schema, str(quotes)) return resultutils.results(result=(result + reason), resultcode=manager_common.RESULT_ERROR) quotes.pop(_database.quote_id) for quote_id in quotes.keys(): if quotes[quote_id] in ignores: quotes.pop(quote_id, None) if quotes: if LOG.isEnabledFor(logging.DEBUG): LOG.debug('quotes not match for %d: %s' % (schema_info['schema_id'], schema)) for quote_id in quotes.keys(): LOG.debug('quote %d: %s exist' % (quote_id, quotes[quote_id])) LOG.debug('Can not delete schema before delete quotes') return resultutils.results(result='Quotes not match', resultcode=manager_common.RESULT_ERROR) LOG.info('Databae quotes check success for %s' % schema) # clean database for _database in _entity.databases: schema = '%s_%s_%s_%d' % (common.NAME, objtype, _database.subtype, entity) if action == 'delete': LOG.warning('Delete schema %s from %d' % (schema, _database.database_id)) try: schema_controller.delete(req=req, database_id=_database.database_id, schema=schema, body={'unquotes': [_database.quote_id], 'ignores': ignores, 'force': force}) except GopdbError as e: LOG.error('Delete schema:%s from %d fail, %s' % (schema, _database.database_id, e.message)) if not force: raise e except Exception: LOG.exception('Delete schema:%s from %d fail' % (schema, _database.database_id)) if not force: raise elif action == 'unquote': LOG.info('Try unquote %d' % _database.quote_id) try: quote = schema_controller.unquote(req=req, quote_id=_database.quote_id)['data'][0] if quote.get('database_id') != _database.database_id: LOG.critical('quote %d with database %d, not %d' % (_database.quote_id, quote.get('database_id'), _database.database_id)) raise RuntimeError('Data error, quote database not the same') rollbacks.append(dict(database_id=_database.database_id, quote_id=_database.quote_id, schema=schema)) except Exception as e: LOG.error('Unquote %d fail, try rollback' % _database.quote_id) if not force: threadpool.add_thread(_rollback) raise e token = uuidutils.generate_uuid() LOG.info('Send delete command with token %s' % token) session.delete(_entity) session.flush() try: entity_controller.delete(req, common.NAME, entity=entity, body=dict(token=token)) except Exception as e: # roll back unquote threadpool.add_thread(_rollback) raise e return resultutils.results(result='delete %s:%d success' % (objtype, entity), data=[dict(entity=entity, objtype=objtype, ports=ports, metadata=metadata)])
def _async_bluck_rpc(self, action, group_id, objtype, entity, body=None, context=None): caller = inspect.stack()[0][3] body = body or {} group_id = int(group_id) context = context or empty_context if entity == 'all': entitys = 'all' else: entitys = argutils.map_to_int(entity) asyncrequest = self.create_asyncrequest(body) target = targetutils.target_endpoint(common.NAME) session = endpoint_session(readonly=True) query = model_query(session, AppEntity, filter=and_(AppEntity.group_id == group_id, AppEntity.objtype == objtype)) emaps = dict() for _entity in query: if _entity.status <= common.DELETED: continue if _entity.status != common.OK and action != 'stop': continue emaps.setdefault(_entity.entity, _entity.agent_id) if entitys == 'all': entitys = emaps.keys() agents = set(emaps.values()) else: if entitys - set(emaps.keys()): raise InvalidArgument( 'Some entitys not found or status is not active') agents = set() for entity in emaps: if entity in entitys: agents.add(emaps[entity]) with context(asyncrequest.request_id, entitys, agents): async_ctxt = dict(pre_run=body.pop('pre_run', None), after_run=body.pop('after_run', None), post_run=body.pop('post_run', None)) rpc_ctxt = {} rpc_ctxt.setdefault('agents', agents) rpc_method = '%s_entitys' % action rpc_args = dict(entitys=list(entitys)) rpc_args.update(body) def wapper(): self.send_asyncrequest(asyncrequest, target, rpc_ctxt, rpc_method, rpc_args, async_ctxt) threadpool.add_thread(safe_func_wrapper, wapper, LOG) return resultutils.results( result='gogamechen2 %s entitys %s spawning' % (objtype, caller), data=[asyncrequest.to_dict()])
def report(self, req, agent_id, body=None): body = body or {} # agent元数据 metadata = body.pop('metadata') # 元数据缓存时间 expire = body.pop('expire') # 性能快照 snapshot = body.pop('snapshot', None) # 有元数据传入,更新缓存中元数据 if metadata: # 随机延迟最长时间是15秒,所以expire时间增加15秒 eventlet.spawn_n(BaseContorller._agent_metadata_flush, agent_id, metadata, expire + manager_common.ONLINE_EXIST_EXPAND) # 没有元数据,延长缓存中的元数据持续时间 else: # 随机延迟0~RANDOMDELAYMAX秒 # 避免所有agent在同一时间调用redis延长key delay = random.randint(0, min(RANDOMDELAYMAX, expire/10)) # 计算补偿时间 fix = manager_common.ONLINE_EXIST_EXPAND - delay def _expire(): try: BaseContorller._agent_metadata_expire(agent_id, expire + fix) except AgentMetadataMiss: # 元数据丢失, 通知agent重新上报 rpc = get_client() # send to rpc server rpc.cast(targetutils.target_all(fanout=True), ctxt={'agents': [agent_id, ]}, msg={'method': 'flush_metadata', 'args': {'expire': expire}}) eventlet.spawn_after(delay, _expire) if snapshot: snapshot.setdefault('agent_id', agent_id) def wapper(): eventlet.sleep(random.randint(0, 5)) # save report log session = get_session() report = AgentReportLog(**snapshot) session.add(report) session.flush() session.close() process = snapshot.get('running') + snapshot.get('sleeping') free = snapshot.get('free') + snapshot.get('cached') conns = snapshot.get('syn') + snapshot.get('enable') cputime = snapshot.get('iowait') + snapshot.get('user') \ + snapshot.get('system') + snapshot.get('nice')\ + snapshot.get('irq') + snapshot.get('sirq') rpc = get_client() # send to rpc server rpc.cast(targetutils.target_rpcserver(fanout=True), ctxt = {}, msg={'method': 'changesource', 'args': {'agent_id': agent_id, 'free': free, 'process': process, 'cputime': cputime, 'iowait': snapshot.get('iowait'), 'left': snapshot.get('left'), 'fds': snapshot.get('num_fds'), 'conns': conns, 'metadata': metadata, }}) threadpool.add_thread(safe_func_wrapper, wapper, LOG) return resultutils.results(result='report success')
def delete(self, req, group_id, objtype, entity, body=None): """标记删除entity""" body = body or {} force = body.get('force', False) group_id = int(group_id) entity = int(entity) session = endpoint_session() glock = get_gamelock() metadata, ports = self._entityinfo(req=req, entity=entity) if not metadata: raise InvalidArgument('Agent offline, can not delete entity') query = model_query(session, AppEntity, filter=AppEntity.entity == entity) if objtype == common.GAMESERVER: query = query.options(joinedload(AppEntity.areas, innerjoin=False)) _entity = query.one() if _entity.status == common.DELETED: return resultutils.results(result='mark %s entity delete success' % objtype, data=[ dict(entity=entity, objtype=objtype, ports=ports, metadata=metadata) ]) if _entity.objtype != objtype: raise InvalidArgument('Objtype not match') if _entity.group_id != group_id: raise InvalidArgument('Group id not match') target = targetutils.target_agent_by_string(metadata.get('agent_type'), metadata.get('host')) target.namespace = common.NAME rpc = get_client() with glock.grouplock(group=group_id): if objtype == common.GMSERVER: if model_count_with_key( session, AppEntity, filter=AppEntity.group_id == group_id) > 1: raise InvalidArgument( 'You must delete other objtype entity before delete gm' ) if model_count_with_key( session, Package, filter=Package.group_id == group_id) > 1: raise InvalidArgument( 'You must delete other Package before delete gm') elif objtype == common.CROSSSERVER: if model_count_with_key( session, AppEntity, filter=AppEntity.cross_id == _entity.entity): raise InvalidArgument('Cross server are reflected') with session.begin(): # 确认实体没有运行 rpc_ret = rpc.call(target, ctxt={'agents': [ _entity.agent_id, ]}, msg={ 'method': 'stoped', 'args': dict(entity=entity) }) if not rpc_ret: raise RpcResultError('check entity stoped result is None') if rpc_ret.get('resultcode') != manager_common.RESULT_SUCCESS: raise RpcResultError('check entity fail %s' % rpc_ret.get('result')) _entity.status = common.DELETED session.flush() if objtype == common.GAMESERVER: # 删除所有资源版本引用 if _entity.versions: for quote in six.itervalues( jsonutils.loads_as_bytes(_entity.versions)): threadpool.add_thread(cdnquote_controller.delete, req, quote.get('quote_id')) _entity.versions = None session.flush() if _entity.areas: if len(_entity.areas) > 1: raise InvalidArgument('%s areas more then one' % objtype) area = _entity.areas[0] if not force: if _entity.entity != model_max_with_key( session, AppEntity.entity, filter=and_( AppEntity.objtype == common.GAMESERVER, AppEntity.group_id == group_id)): raise InvalidArgument( 'entity %d is not the last gamesvr entity in group' % entity) session.flush() session.delete(area) session.flush() _query = model_query( session, PackageArea, filter=PackageArea.area_id == area.area_id) _query.delete() session.flush() rpc.cast(target, ctxt={'agents': [ _entity.agent_id, ]}, msg={ 'method': 'change_status', 'args': dict(entity=entity, status=common.DELETED) }) return resultutils.results(result='mark %s entity delete success' % objtype, data=[ dict(entity=entity, objtype=objtype, ports=ports, metadata=metadata) ])
def create(self, req, group_id, objtype, body=None): body = body or {} group_id = int(group_id) jsonutils.schema_validate(body, self.CREATEAPPENTITY) if body.get('packages'): raise InvalidArgument('Package parameter is removed') # 找cross服务, gameserver专用 cross_id = body.pop('cross_id', None) # 开服时间, gameserver专用 opentime = body.pop('opentime', None) # 区服显示id, gameserver专用 show_id = body.pop('show_id', None) # 区服显示民称, gameserver专用 areaname = body.pop('areaname', None) # 平台类型 platform = body.get('platform') include = set(body.pop('include', [])) exclude = set(body.pop('exclude', [])) if include and exclude: raise InvalidArgument('Both packages and exclude is forbidden') packages = [] session = endpoint_session() if objtype == common.GAMESERVER: platform = common.PlatformTypeMap.get(platform) if not areaname or not opentime or not platform or not show_id: raise InvalidArgument( '%s need opentime and areaname and platform and show_id' % objtype) # 安装文件信息 appfile = body.pop(common.APPFILE) LOG.debug('Try find agent and database for entity') # 选择实例运行服务器 agent_id = body.get('agent_id') or self._agentselect( req, objtype, **body) # 选择实例运行数据库 databases = self._dbselect(req, objtype, **body) # 校验数据库信息 if not self._validate_databases(objtype, databases): raise InvalidArgument('Miss some database') LOG.info( 'Find agent and database for entity success, to agent %d, to databse %s' % (agent_id, str(databases))) query = model_query(session, Group, filter=Group.group_id == group_id) joins = joinedload(Group.entitys, innerjoin=False) joins = joins.joinedload(AppEntity.databases, innerjoin=False) query = query.options(joins) _group = query.one() glock = get_gamelock() with glock.grouplock(group_id): if objtype == common.GAMESERVER: _pquery = model_query(session, Package, filter=Package.group_id == group_id) _packages = set([ p.package_id for p in _pquery.all() if p.platform & platform ]) if (include - _packages) or (exclude - _packages): raise InvalidArgument( 'Package can not be found in include or exclude') if exclude: packages = _packages - exclude elif include: packages = include else: packages = _packages typemap = {} for _entity in _group.entitys: # 跳过未激活的实体 if _entity.status != common.OK: continue try: typemap[_entity.objtype].append(_entity) except KeyError: typemap[_entity.objtype] = [ _entity, ] # 前置实体 chiefs = None # 相同类型的实例列表 same_type_entitys = typemap.get(objtype, []) if objtype == common.GMSERVER: # GM服务不允许相同实例,必须clean掉所有同组GM服务器 for _entity in _group.entitys: if _entity.objtype == common.GMSERVER: return resultutils.results( result='create entity fail, %s duplicate in group' % objtype, resultcode=manager_common.RESULT_ERROR) else: # 非gm实体添加需要先找到同组的gm try: gm = typemap[common.GMSERVER][0] if gm.status <= common.DELETED: return resultutils.results( result='Create entity fail, gm mark deleted', resultcode=manager_common.RESULT_ERROR) except KeyError as e: return resultutils.results( result='Create entity fail, can not find GMSERVER: %s' % e.message, resultcode=manager_common.RESULT_ERROR) if objtype == common.GAMESERVER: if model_count_with_key( session, GameArea, filter=and_(GameArea.group_id == group_id, GameArea.areaname == areaname)): return resultutils.results( result='Create entity fail, name exist', resultcode=manager_common.RESULT_ERROR) cross = None # 游戏服务器需要在同组中找到cross实例 try: crossservers = typemap[common.CROSSSERVER] except KeyError as e: return resultutils.results( result= 'create entity fail, can not find my chief: %s' % e.message, resultcode=manager_common.RESULT_ERROR) # 如果指定了cross实例id if cross_id: # 判断cross实例id是否在当前组中 for _cross in crossservers: if cross_id == _cross.entity: cross = _cross break else: # 游戏服没有相同实例,直接使用第一个cross实例 if not same_type_entitys: cross = crossservers[0] else: # 统计所有cross实例的引用次数 counted = set() counter = dict() for _cross in crossservers: counter.setdefault(_cross.entity, 0) # 查询当前组内所有entity对应的cross_id for _entity in _group.entitys: if _entity.objtype != common.GAMESERVER: continue if _entity.cross_id in counted: continue counter[_entity.cross_id] += 1 # 选取引用次数最少的cross_id cross_id = sorted( zip(counter.itervalues(), counter.iterkeys()))[0][1] for _cross in crossservers: if cross_id == _cross.entity: cross = _cross break if not cross: raise InvalidArgument( 'cross server can not be found or not active') # 获取实体相关服务器信息(端口/ip) maps = entity_controller.shows( endpoint=common.NAME, entitys=[gm.entity, cross.entity]) for v in six.itervalues(maps): if v is None: raise InvalidArgument( 'Get chiefs info error, agent not online?') chiefs = dict() # 战场与GM服务器信息 for chief in (cross, gm): chiefmetadata = maps.get(chief.entity).get('metadata') ports = maps.get(chief.entity).get('ports') if not chiefmetadata: raise InvalidArgument( '%s.%d is offline' % (chief.objtype, chief.entity)) need = common.POSTS_COUNT[chief.objtype] if need and len(ports) != need: raise InvalidArgument('%s.%d port count error, ' 'find %d, need %d' % (chief.objtype, chief.entity, len(ports), need)) chiefs.setdefault( chief.objtype, dict(entity=chief.entity, ports=ports, local_ip=chiefmetadata.get('local_ip'))) cross_id = cross.entity # 完整的rpc数据包 create_body = dict(objtype=objtype, appfile=appfile, databases=databases, chiefs=chiefs, entity=int(body.get('entity', 0))) with session.begin(): body.setdefault('finishtime', rpcfinishtime()[0] + 5) try: create_result = entity_controller.create( req=req, agent_id=agent_id, endpoint=common.NAME, body=create_body)['data'][0] except RpcResultError as e: LOG.error('Create entity rpc call fail: %s' % e.message) raise InvalidArgument(e.message) entity = create_result.get('entity') rpc_result = create_result.get('notify') LOG.info('Create new entity %d' % entity) LOG.debug('Entity controller create rpc result %s', str(rpc_result)) # 插入实体信息 appentity = AppEntity(entity=entity, agent_id=agent_id, group_id=group_id, objtype=objtype, cross_id=cross_id, opentime=opentime, platform=platform) session.add(appentity) session.flush() if objtype == common.GAMESERVER: areaname = areaname.decode('utf-8') if isinstance( areaname, six.binary_type) else areaname gamearea = GameArea(group_id=_group.group_id, show_id=show_id, areaname=areaname, gid=None, entity=appentity.entity) session.add(gamearea) session.flush() # area id插入渠道包包含列表中,批量操作 if packages: for package_id in packages: session.add( PackageArea(package_id=package_id, area_id=gamearea.area_id)) session.flush() # 插入数据库绑定信息 if rpc_result.get('databases'): self._bondto(session, entity, rpc_result.get('databases')) else: LOG.error('New entity database miss') _result = dict(entity=entity, objtype=objtype, agent_id=agent_id, connection=rpc_result.get('connection'), ports=rpc_result.get('ports'), databases=rpc_result.get('databases')) areas = [] if objtype == common.GAMESERVER: areas = [ dict(area_id=gamearea.area_id, gid=0, areaname=areaname, show_id=show_id) ] _result.setdefault('areas', areas) _result.setdefault('cross_id', cross_id) _result.setdefault('opentime', opentime) _result.setdefault('platform', platform) _result.setdefault('packages', sorted(packages)) # 添加端口 # threadpool.add_thread(port_controller.unsafe_create, # agent_id, common.NAME, entity, rpc_result.get('ports')) port_controller.unsafe_create(agent_id, common.NAME, entity, rpc_result.get('ports')) # agent 后续通知 threadpool.add_thread(entity_controller.post_create_entity, entity, common.NAME, objtype=objtype, status=common.UNACTIVE, opentime=opentime, group_id=group_id, areas=areas) return resultutils.results(result='create %s entity success' % objtype, data=[ _result, ])
def merge(self, req, body=None): """合服接口,用于合服, 部分代码和create代码一直,未整合""" body = body or {} jsonutils.schema_validate(body, self.MERGEAPPENTITYS) group_id = body.pop('group_id') # 需要合并的实体 entitys = list(set(body.pop('entitys'))) entitys.sort() session = endpoint_session() # 安装文件信息 appfile = body.pop(common.APPFILE) # 选择合并后实例运行服务器 agent_id = body.get('agent_id') or self._agentselect( req, common.GAMESERVER, **body) # 选择合并后实体数据库 databases = self._dbselect(req, common.GAMESERVER, **body) opentime = body.get('opentime') # 合服任务ID uuid = uuidutils.generate_uuid() # chiefs信息初始化 query = model_query(session, AppEntity, filter=and_( AppEntity.group_id == group_id, AppEntity.objtype.in_( [common.GMSERVER, common.CROSSSERVER]))) # 找到同组的gm和战场服 gm = None cross = None crosss = [] # 默认平台识标 platform = None # 锁组 glock = get_gamelock() with glock.grouplock(group_id): if model_count_with_key(session, MergeEntity, filter=MergeEntity.entity.in_(entitys)): raise InvalidArgument('Target entity merged or in mergeing') for appentity in query: if appentity.status != common.OK: continue if appentity.objtype == common.GMSERVER: gm = appentity else: crosss.append(appentity) if not gm: raise InvalidArgument( 'Group not exist or gm not active/exist?') if not crosss: raise InvalidArgument('Group has no cross server?') if not body.get('cross_id'): cross = crosss[0] else: for appentity in crosss: if appentity.entity == body.get('cross_id'): cross = appentity break if not cross: raise InvalidArgument('cross server can not be found?') # 获取实体相关服务器信息(端口/ip) maps = entity_controller.shows(endpoint=common.NAME, entitys=[gm.entity, cross.entity]) chiefs = dict() # 战场与GM服务器信息 for chief in (cross, gm): chiefmetadata = maps.get(chief.entity).get('metadata') ports = maps.get(chief.entity).get('ports') if not chiefmetadata: raise InvalidArgument('%s.%d is offline' % (chief.objtype, chief.entity)) need = common.POSTS_COUNT[chief.objtype] if need and len(ports) != need: raise InvalidArgument( '%s.%d port count error, ' 'find %d, need %d' % (chief.objtype, chief.entity, len(ports), need)) chiefs.setdefault( chief.objtype, dict(entity=chief.entity, ports=ports, local_ip=chiefmetadata.get('local_ip'))) # 需要合服的实体 appentitys = [] query = model_query(session, AppEntity, filter=and_(AppEntity.group_id == group_id, AppEntity.entity.in_(entitys))) query = query.options(joinedload(AppEntity.areas, innerjoin=False)) with session.begin(): for appentity in query: if appentity.objtype != common.GAMESERVER: raise InvalidArgument( 'Target entity %d is not %s' % (appentity.entity, common.GAMESERVER)) if appentity.status != common.UNACTIVE: raise InvalidArgument( 'Target entity %d is not unactive' % appentity.entity) if not appentity.areas: raise InvalidArgument('Target entity %d has no area?' % appentity.entity) if appentity.versions: raise InvalidArgument( 'Traget entity %d version is not None' % appentity.entity) if platform is None: platform = appentity.platform else: # 区服平台不相同, 位操作合并platform platform = platform | appentity.platform appentitys.append(appentity) if not opentime: opentime = appentity.opentime if len(appentitys) != len(entitys): raise InvalidArgument('Can not match entitys count') # 完整的rpc数据包,准备发送合服命令到agent body = dict(appfile=appfile, databases=databases, opentime=opentime, chiefs=chiefs, uuid=uuid, entitys=entitys) body.setdefault('finishtime', rpcfinishtime()[0] + 5) try: create_result = entity_controller.create( req=req, agent_id=agent_id, endpoint=common.NAME, body=body, action='merge')['data'][0] except RpcResultError as e: LOG.error('Create entity rpc call fail: %s' % e.message) raise InvalidArgument(e.message) mergetd_entity = create_result.get('entity') rpc_result = create_result.get('notify') LOG.info('Merge to entity %d, agent %d' % (mergetd_entity, agent_id)) LOG.debug('Entity controller merge rpc result %s' % str(rpc_result)) # 插入实体信息 appentity = AppEntity(entity=mergetd_entity, agent_id=agent_id, group_id=group_id, objtype=common.GAMESERVER, cross_id=cross.entity, opentime=opentime, platform=platform) session.add(appentity) session.flush() # 插入数据库绑定信息 if rpc_result.get('databases'): self._bondto(session, mergetd_entity, rpc_result.get('databases')) else: LOG.error('New entity database miss') # 插入合服记录 mtask = MergeTask(uuid=uuid, entity=mergetd_entity, mergetime=int(time.time())) session.add(mtask) session.flush() for _appentity in appentitys: session.add( MergeEntity(entity=_appentity.entity, uuid=uuid)) session.flush() # 批量修改被合并服的状态 query.update({'status': common.MERGEING}, synchronize_session=False) session.flush() port_controller.unsafe_create(agent_id, common.NAME, mergetd_entity, rpc_result.get('ports')) # agent 后续通知 threadpool.add_thread(entity_controller.post_create_entity, appentity.entity, common.NAME, objtype=common.GAMESERVER, status=common.UNACTIVE, opentime=opentime, group_id=group_id, areas=[]) # 添加端口 # threadpool.add_thread(port_controller.unsafe_create, # agent_id, common.NAME, mergetd_entity, rpc_result.get('ports')) return resultutils.results( result='entitys is mergeing', data=[dict(uuid=uuid, entitys=entitys, entity=mergetd_entity)])
def create_entity(self, entity, timeout, **kwargs): """ @param dbtype: string 数据库类型 @param configs: dict 数据库配置字典 @param auth: dict 远程管理员账号密码 @param bond: dict 需要绑定的从库(主库专用参数) """ dbtype = kwargs.pop('dbtype') configs = kwargs.pop('configs', {}) bond = kwargs.pop('bond', None) if bond: replication = privilegeutils.mysql_replprivileges( bond.get('database_id'), bond.get('host')) kwargs['replication'] = replication port = configs.pop('port', None) pidfile = os.path.join(self.entity_home(entity), '%s.pid' % dbtype) sockfile = os.path.join(self.entity_home(entity), '%s.sock' % dbtype) logfile = os.path.join(self.logpath(entity), '%s.log' % dbtype) install_log = os.path.join(self.logpath(entity), 'install.log') cfgfile = self._db_conf(entity, dbtype) LOG.info('Load database manager for %s' % dbtype) dbmanager = utils.impl_cls('rpc', dbtype) with self._prepare_entity_path(entity, apppath=False): with self._allocate_port(entity, port) as ports: port = ports[0] configs.setdefault('entity', entity) configs.setdefault('port', port) configs.setdefault('datadir', self.apppath(entity)) configs.setdefault('pidfile', pidfile) configs.setdefault('sockfile', sockfile) configs.setdefault('logfile', logfile) configs.setdefault('runuser', self.entity_user(entity)) dbmanager.save_conf(cfgfile, **configs) LOG.info('Prepare database config file success') def _notify_success(binlog): """notify database intance create success""" self.manager.change_performance() dbinfo = self.konwn_database.get(entity) if not dbinfo: LOG.warning( 'Can not find entity database id, active fail') return if bond: LOG.debug('Try bond slave database') self.client.database_bond( database_id=bond.get('database_id'), body={ 'master': dbinfo.get('database_id'), 'host': self.manager.local_ip, 'port': port, 'passwd': replication.get('passwd'), 'file': binlog.get('File'), 'position': binlog.get('Position'), 'schemas': [], }) if self._entity_process(entity): self.client.database_update( database_id=dbinfo.get('database_id'), body={'status': common.OK}) kwargs.update({'logfile': install_log}) threadpool.add_thread(dbmanager.install, cfgfile, _notify_success, timeout, **kwargs) # def _port_notify(): # """notify port bond""" # _timeout = timeout if timeout else 30 # overtime = int(time.time()) + _timeout # while entity not in self.konwn_database: # if int(time.time()) > overtime: # LOG.error('Fail allocate port %d for %s.%d' % (ports[0], common.DB, entity)) # return # eventlet.sleep(1) # self.client.ports_add(agent_id=self.manager.agent_id, # endpoint=common.DB, entity=entity, ports=ports) # threadpool.add_thread(_port_notify) return port
def rpc_asyncrequest(self, ctxt, asyncrequest, rpc_target, rpc_method, rpc_ctxt, rpc_args): """async respone check""" session = get_session() finishtime = ctxt.get('finishtime', None) asyncrequest = AsyncRequest(**asyncrequest) pre_run = ctxt.pop('pre_run', None) after_run = ctxt.pop('after_run', None) post_run = ctxt.pop('post_run', None) if finishtime and int(realnow()) >= finishtime: asyncrequest.resultcode = manager_common.RESULT_OVER_FINISHTIME asyncrequest.result = 'Async request over finish time' asyncrequest.status = manager_common.FINISH try: session.add(asyncrequest) session.flush() except DBDuplicateEntry: LOG.warning('Async request record DBDuplicateEntry') except DBError as e: LOG.error('Async request record DBError %s: %s' % (e.__class__.__name__, e.message)) return if not self.is_active: asyncrequest.resultcode = manager_common.SCHEDULER_STATUS_ERROR asyncrequest.result = 'Rpc server not active now' asyncrequest.status = manager_common.FINISH session.add(asyncrequest) session.flush() return try: if pre_run: pre_run = self._compile('pre', pre_run) if after_run: after_run = self._compile('after', after_run) if post_run: post_run = self._compile('post', post_run) except (KeyError, jsonutils.ValidationError): asyncrequest.resultcode = manager_common.SCHEDULER_EXECUTER_ERROR asyncrequest.result = 'Rpc server can not find executer or ctxt error' asyncrequest.status = manager_common.FINISH session.add(asyncrequest) session.flush() return # except Exception: # LOG.exception('wtf') # raise if rpc_ctxt.get('agents') is None: wait_agents = [x[0] for x in model_query(session, Agent.agent_id, filter=Agent.status > manager_common.DELETED).all()] else: wait_agents = rpc_ctxt.get('agents') rpc_ctxt.update({'request_id': asyncrequest.request_id, 'expire': asyncrequest.expire, 'finishtime': asyncrequest.finishtime}) try: target = Target(**rpc_target) rpc = get_client() except Exception: LOG.error('Prepare rpc clinet error') asyncrequest.resultcode = manager_common.SCHEDULER_PREPARE_ERROR asyncrequest.result = 'Rpc server prepare rpc clinet error' asyncrequest.status = manager_common.FINISH session.add(asyncrequest) session.flush() return if pre_run: try: pre_run.run(asyncrequest, wait_agents) except RpcServerCtxtException as e: asyncrequest.resultcode = manager_common.SCHEDULER_EXECUTER_ERROR asyncrequest.result = 'Rpc server ctxt pre function fail: %s' % e.message asyncrequest.status = manager_common.FINISH session.add(asyncrequest) session.flush() return session.add(asyncrequest) session.flush() LOG.debug('Try cast rpc method %s' % rpc_method) try: rpc.cast(target, ctxt=rpc_ctxt, msg={'method': rpc_method, 'args': rpc_args}) except AMQPDestinationNotFound: asyncrequest.resultcode = manager_common.SEND_FAIL asyncrequest.result = 'Async %s request send fail, AMQPDestinationNotFound' % rpc_method asyncrequest.status = manager_common.FINISH session.flush() return LOG.debug('Cast %s to %s success' % (asyncrequest.request_id, target.to_dict())) if after_run: try: after_run.run(asyncrequest, wait_agents) except RpcServerCtxtException as e: asyncrequest.result = 'Async request %s cast success, ' \ 'ctxt after function error~%s' % (rpc_method, e.message) else: asyncrequest.result = 'Async request %s cast success' % rpc_method finally: session.flush() request_id = asyncrequest.request_id finishtime = asyncrequest.finishtime deadline = asyncrequest.deadline + 1 expire = asyncrequest.expire if expire: storage = get_cache() else: storage = session def check_respone(): wait = finishtime - int(time.time()) # 先等待3秒,可以做一次提前检查 if wait > 3: eventlet.sleep(3) no_response_agents = set(wait_agents) interval = int(wait / 10) if interval < 3: interval = 3 elif interval > 10: interval = 10 not_overtime = 2 while True: no_response_agents = responeutils.norespones(storage=storage, request_id=request_id, agents=no_response_agents) if not no_response_agents: break if int(time.time()) < finishtime: eventlet.sleep(interval) if int(time.time()) > deadline: not_overtime -= 1 if not not_overtime: break eventlet.sleep(1) LOG.debug('Not response agents count %d' % len(no_response_agents)) bulk_data = [] agent_time = int(time.time()) for agent_id in no_response_agents: data = dict(request_id=request_id, agent_id=agent_id, agent_time=agent_time, server_time=agent_time, resultcode=manager_common.RESULT_OVER_FINISHTIME, result='Agent respone overtime') bulk_data.append(data) responeutils.bluk_insert(storage, no_response_agents, bulk_data, expire) asyncrequest.status = manager_common.FINISH if no_response_agents: asyncrequest.resultcode = manager_common.RESULT_NOT_ALL_SUCCESS asyncrequest.result = 'agents not respone, count:%d' % len(no_response_agents) else: asyncrequest.resultcode = manager_common.RESULT_SUCCESS asyncrequest.result = 'all agent respone result' session.flush() if post_run: try: post_run.run(asyncrequest, no_response_agents) except RpcServerCtxtException as e: asyncrequest.result += (' ctxt post function error~%s' % e.message) session.flush() session.close() threadpool.add_thread(safe_func_wrapper, check_respone, LOG)