def __init__(self, manager): self._manager = manager self._evaluator = CriteriaEvaluator() self._taskExecutor = HelixTaskExecutor() self._asyncCallbackService = AsyncCallbackService() self._taskExecutor.registerMessageHandlerFactory(MessageType.toString(MessageType.TASK_REPLY), self._asyncCallbackService) self._messageHandlerFactoriestobeAdded={} #ConcurrentHashMap<String, MessageHandlerFactory>
def scheduleTask(self, message, handler, notificationContext): """ Returns void Parameters: message: Messagehandler: MessageHandlernotificationContext: NotificationContext """ # assert (handler != None) # synchronized (self._lock) self._lock.acquire() try: # String taskId = message.getMsgId() + "/" + message.getPartitionName() if message.getMsgType() == MessageType.toString(MessageType.STATE_TRANSITION): self.checkResourceConfig(message.getResourceName(), notificationContext.getManager()) self.LOG.info("Scheduling message: " + taskId) self._statusUpdateUtil.logInfo(message, HelixTaskExecutor, "Message handling task scheduled", notificationContext.getManager().getHelixDataAccessor()) # HelixTask task = HelixTask(message, notificationContext, handler, self) if not self._taskMap.__contains__(taskId): self.LOG.info("Message:" + taskId + " handling task scheduled") # Future<HelixTaskResult> future = self.findExecutorServiceForMsg(message).submit(task.call) self._taskMap.__setitem__(taskId, future) else: # pass self._statusUpdateUtil.logWarning(message, HelixTaskExecutor, "Message handling task already sheduled for " + taskId, notificationContext.getManager().getHelixDataAccessor()) except KeyboardInterrupt, e: # TODO. enable catch # except Exception, e: self.LOG.error("Error while executing task." + str(message)+ str(e)) self._statusUpdateUtil.logError(message, HelixTaskExecutor, e, "Error while executing task " + str(e), notificationContext.getManager().getHelixDataAccessor())
def start(self): """ Returns void Throws: Exception """ if self._file == None: manager = HelixManagerFactory.getZKHelixManager(self.clusterName, self.instanceName, InstanceType.PARTICIPANT, self.zkConnectString) else: manager = HelixManagerFactory.getStaticFileHelixManager(self.clusterName, self.instanceName, InstanceType.PARTICIPANT, self._file) if "MasterSlave".upper() == self.stateModelType.upper(): stateModelFactory = MasterSlaveStateModelFactory(self.delay) else: if "OnlineOffline".upper() == self.stateModelType.upper(): stateModelFactory = OnlineOfflineStateModelFactory(self.delay) else: if "LeaderStandby".upper() == self.stateModelType.upper(): stateModelFactory = LeaderStandbyStateModelFactory(self.delay) # StateMachineEngine stateMach = manager.getStateMachineEngine() stateMach.registerStateModelFactory(self.stateModelType, stateModelFactory) manager.connect() manager.getMessagingService().registerMessageHandlerFactory(MessageType.toString(MessageType.STATE_TRANSITION), stateMach)
def start(self): """ Returns void Throws: Exception """ if self._file == None: manager = HelixManagerFactory.getZKHelixManager( self.clusterName, self.instanceName, InstanceType.PARTICIPANT, self.zkConnectString) else: manager = HelixManagerFactory.getStaticFileHelixManager( self.clusterName, self.instanceName, InstanceType.PARTICIPANT, self._file) if "MasterSlave".upper() == self.stateModelType.upper(): stateModelFactory = MasterSlaveStateModelFactory(self.delay) else: if "OnlineOffline".upper() == self.stateModelType.upper(): stateModelFactory = OnlineOfflineStateModelFactory(self.delay) else: if "LeaderStandby".upper() == self.stateModelType.upper(): stateModelFactory = LeaderStandbyStateModelFactory( self.delay) # StateMachineEngine stateMach = manager.getStateMachineEngine() stateMach.registerStateModelFactory(self.stateModelType, stateModelFactory) manager.connect() manager.getMessagingService().registerMessageHandlerFactory( MessageType.toString(MessageType.STATE_TRANSITION), stateMach)
def checkResourceConfig(self, resourceName, manager): """ Returns void Parameters: resourceName: Stringmanager: HelixManager """ if not self._resourceThreadpoolSizeMap.__contains__(resourceName): # int threadpoolSize = -1 # ConfigAccessor configAccessor = manager.getConfigAccessor() if configAccessor != None: # ConfigScope scope = ConfigScopeBuilder().forCluster(manager.getClusterName()).forResource(resourceName).build() # String threadpoolSizeStr = configAccessor.get(scope, HelixTaskExecutor.MAX_THREADS) try: if threadpoolSizeStr != None: threadpoolSize = int(threadpoolSizeStr) except Exception, e: self.LOG.error(""+ str(e)) if threadpoolSize > 0: # String key = MessageType.toString(MessageType.STATE_TRANSITION) + "." + resourceName self._threadpoolMap[key] = futures.ThreadPoolExecutor(threadpoolSize) # self._threadpoolMap.[key] = Executors.newFixedThreadPool(threadpoolSize)) self.LOG.info("Adding per resource threadpool for resource " + resourceName + " with size " + str(threadpoolSize)) self._resourceThreadpoolSizeMap.__setitem__(resourceName, threadpoolSize)
def getMessageType(self): """ Returns String @Override """ return MessageType.toString(MessageType.STATE_TRANSITION)
def getMessageType(self): """ Returns String @Override """ return MessageType.toString(MessageType.TASK_REPLY)
def __init__(self, manager): self._manager = manager self._evaluator = CriteriaEvaluator() self._taskExecutor = HelixTaskExecutor() self._asyncCallbackService = AsyncCallbackService() self._taskExecutor.registerMessageHandlerFactory( MessageType.toString(MessageType.TASK_REPLY), self._asyncCallbackService) self._messageHandlerFactoriestobeAdded = { } #ConcurrentHashMap<String, MessageHandlerFactory>
def getStatusUpdateSubPath(self, message): """ Returns String Parameters: message: Message """ if message.getMsgType().upper() == MessageType.toString(MessageType.STATE_TRANSITION).upper(): return message.getResourceName() else: return message.getMsgType()
def getStatusUpdateRecordName(self, message): """ Returns String Parameters: message: Message """ if message.getMsgType().upper() == MessageType.toString(MessageType.STATE_TRANSITION).upper(): return message.getTgtSessionId() + "__" + message.getResourceName() return message.getMsgId()
def getStatusUpdateKey(self, message): """ Returns String Parameters: message: Message Java modifiers: private """ if message.getMsgType().upper() == MessageType.toString(MessageType.STATE_TRANSITION).upper(): return message.getPartitionName() return message.getMsgId()
def scheduleTask(self, message, handler, notificationContext): """ Returns void Parameters: message: Messagehandler: MessageHandlernotificationContext: NotificationContext """ # assert (handler != None) # synchronized (self._lock) self._lock.acquire() try: # String taskId = message.getMsgId() + "/" + message.getPartitionName() if message.getMsgType() == MessageType.toString( MessageType.STATE_TRANSITION): self.checkResourceConfig(message.getResourceName(), notificationContext.getManager()) self.LOG.info("Scheduling message: " + taskId) self._statusUpdateUtil.logInfo( message, HelixTaskExecutor, "Message handling task scheduled", notificationContext.getManager().getHelixDataAccessor()) # HelixTask task = HelixTask(message, notificationContext, handler, self) if not self._taskMap.__contains__(taskId): self.LOG.info("Message:" + taskId + " handling task scheduled") # Future<HelixTaskResult> future = self.findExecutorServiceForMsg(message).submit( task.call) self._taskMap.__setitem__(taskId, future) else: # pass self._statusUpdateUtil.logWarning( message, HelixTaskExecutor, "Message handling task already sheduled for " + taskId, notificationContext.getManager().getHelixDataAccessor()) except KeyboardInterrupt, e: # TODO. enable catch # except Exception, e: self.LOG.error("Error while executing task." + str(message) + str(e)) self._statusUpdateUtil.logError( message, HelixTaskExecutor, e, "Error while executing task " + str(e), notificationContext.getManager().getHelixDataAccessor())
def checkResourceConfig(self, resourceName, manager): """ Returns void Parameters: resourceName: Stringmanager: HelixManager """ if not self._resourceThreadpoolSizeMap.__contains__(resourceName): # int threadpoolSize = -1 # ConfigAccessor configAccessor = manager.getConfigAccessor() if configAccessor != None: # ConfigScope scope = ConfigScopeBuilder().forCluster( manager.getClusterName()).forResource( resourceName).build() # String threadpoolSizeStr = configAccessor.get( scope, HelixTaskExecutor.MAX_THREADS) try: if threadpoolSizeStr != None: threadpoolSize = int(threadpoolSizeStr) except Exception, e: self.LOG.error("" + str(e)) if threadpoolSize > 0: # String key = MessageType.toString( MessageType.STATE_TRANSITION) + "." + resourceName self._threadpoolMap[key] = futures.ThreadPoolExecutor( threadpoolSize) # self._threadpoolMap.[key] = Executors.newFixedThreadPool(threadpoolSize)) self.LOG.info("Adding per resource threadpool for resource " + resourceName + " with size " + str(threadpoolSize)) self._resourceThreadpoolSizeMap.__setitem__( resourceName, threadpoolSize)
def findExecutorServiceForMsg(self, message): """ Returns ExecutorService Parameters: message: Message """ # ExecutorService executorService = self._threadpoolMap.get(message.getMsgType()) if (message.getMsgType() == MessageType.toString(MessageType.STATE_TRANSITION)): # String resourceName = message.getResourceName() if resourceName != None: # String key = message.getMsgType() + "." + resourceName if self._threadpoolMap.__contains__(key): self.LOG.info("Find per-resource thread pool with key " + key) executorService = self._threadpoolMap.get(key) return executorService
def findExecutorServiceForMsg(self, message): """ Returns ExecutorService Parameters: message: Message """ # ExecutorService executorService = self._threadpoolMap.get(message.getMsgType()) if (message.getMsgType() == MessageType.toString( MessageType.STATE_TRANSITION)): # String resourceName = message.getResourceName() if resourceName != None: # String key = message.getMsgType() + "." + resourceName if self._threadpoolMap.__contains__(key): self.LOG.info("Find per-resource thread pool with key " + key) executorService = self._threadpoolMap.get(key) return executorService
def createHandler(self, message, context): """ Returns MessageHandler Parameters: message: Messagecontext: NotificationContext @Override """ # String type = message.getMsgType() if not (type == MessageType.toString(MessageType.STATE_TRANSITION)): raise HelixException("Unexpected msg type for message " + message.getMsgId() + " type:" + message.getMsgType()) # String partitionKey = message.getPartitionName() # String stateModelName = message.getStateModelDef() # String resourceName = message.getResourceName() # String sessionId = message.getTgtSessionId() # int bucketSize = message.getBucketSize() if stateModelName == None: self.logger.error("message does not contain stateModelDef") return None # String factoryName = message.getStateModelFactoryName() if factoryName == None: factoryName = HelixConstants.DEFAULT_STATE_MODEL_FACTORY # StateModelFactory stateModelFactory = self.getStateModelFactory(stateModelName, factoryName) if stateModelFactory == None: self.logger.warn("Cannot find stateModelFactory for model:" + stateModelName + " using factoryName:" + factoryName + " for resourceGroup:" + resourceName) return None if not self._stateModelDefs.__contains__(stateModelName): # HelixDataAccessor accessor = self._manager.getHelixDataAccessor() # Builder keyBuilder = accessor.keyBuilder() # StateModelDefinition stateModelDef = accessor.getProperty(keyBuilder.stateModelDef(stateModelName)) if stateModelDef == None: raise HelixException("stateModelDef for " + stateModelName + " does NOT exists") self._stateModelDefs.__setitem__(stateModelName, stateModelDef) # String initState = self._stateModelDefs.get(message.getStateModelDef()).getInitialState() # StateModel stateModel = stateModelFactory.getStateModel(partitionKey) if stateModel == None: stateModelFactory.createAndAddStateModel(partitionKey) stateModel = stateModelFactory.getStateModel(partitionKey) stateModel.updateState(initState) # CurrentState currentStateDelta = CurrentState(resourceName) currentStateDelta.setSessionId(sessionId) currentStateDelta.setStateModelDefRef(stateModelName) currentStateDelta.setStateModelFactoryName(factoryName) currentStateDelta.setBucketSize(bucketSize) currentStateDelta.setState(partitionKey, ternary((stateModel.getCurrentState() == None), initState, stateModel.getCurrentState())) # HelixTaskExecutor executor = context.get(NotificationContext.TASK_EXECUTOR_KEY) # executor = (HelixTaskExecutor) context.get(NotificationContext.TASK_EXECUTOR_KEY) return HelixStateTransitionHandler(stateModel, message, context, currentStateDelta, executor)
def onMessage(self, instanceName, messages, changeContext): """ Returns void Parameters: instanceName: Stringmessages: List<Message>changeContext: NotificationContext @Override """ if changeContext.getType() == NotificationContext.Type.FINALIZE: self.LOG.info("Get FINALIZE notification") for factory in self._handlerFactoryMap.values(): factory.reset() for f in self._taskMap.values(): # Future<HelixTaskResult> f.cancel(True) self._taskMap.clear() return # HelixManager manager = changeContext.getManager() # HelixDataAccessor accessor = manager.getHelixDataAccessor() # Builder keyBuilder = accessor.keyBuilder() if messages == None or len(messages) == 0: self.LOG.info("No Messages to process") return # Collections.sort(messages, Message.CREATE_TIME_COMPARATOR) messages.sort(Message.CREATE_TIME_COMPARATOR) # List<MessageHandler> handlers = [] # handlers = ArrayList<MessageHandler>() # List<Message> # readMsgs = ArrayList<Message>() readMsgs = [] # String sessionId = manager.getSessionId() # List<String> curResourceNames = accessor.getChildNames( keyBuilder.currentStates(instanceName, sessionId)) # List<PropertyKey> createCurStateKeys = [] # List<CurrentState> metaCurStates = [] # Set<String> createCurStateNames = set() changeContext.add(NotificationContext.TASK_EXECUTOR_KEY, self) for message in messages: if message.getMsgType().upper() == MessageType.toString( MessageType.NO_OP).upper(): self.LOG.info("Dropping NO-OP message. mid: " + message.getId() + ", from: " + message.getMsgSrc()) accessor.removeProperty( message.getKey(keyBuilder, instanceName)) continue # String tgtSessionId = message.getTgtSessionId() if not (sessionId == tgtSessionId) and not (tgtSessionId == "*"): # String warningMessage = "SessionId does NOT match. expected sessionId: " + sessionId + ", tgtSessionId in message: " + tgtSessionId + ", messageId: " + message.getMsgId( ) self.LOG.warn(warningMessage) accessor.removeProperty( message.getKey(keyBuilder, instanceName)) self._statusUpdateUtil.logWarning(message, HelixStateMachineEngine, warningMessage, accessor) continue if MessageState.NEW != message.getMsgState(): self.LOG.trace("Message already read. mid: " + message.getMsgId()) continue try: # List<MessageHandler> createHandlers = self.createMessageHandlers( message, changeContext) # if createHandlers.isEmpty(): if not createHandlers: continue handlers.extend(createHandlers) except KeyboardInterrupt, e: #TODO: enable this? # except Exception, e: # String error = "Failed to create message handler for " + message.getMsgId( ) + ", exception: " + str(e) self._statusUpdateUtil.logError(message, HelixStateMachineEngine, e, error, accessor) message.setMsgState(MessageState.UNPROCESSABLE) accessor.removeProperty( message.getKey(keyBuilder, instanceName)) # TODO: comment out for now # # ObjectMapper # mapper = ObjectMapper() # # SerializationConfig # serializationConfig = mapper.getSerializationConfig() # serializationConfig.set(SerializationConfig.Feature.INDENT_OUTPUT, True) # # StringWriter # sw = StringWriter() # try: # mapper.writeValue(sw, message.getRecord()) # self.LOG.error("Message cannot be processed:" + str(sw.toString())+ str(e)) # except Exception, ex: # self.LOG.error(""+ str(ex)) continue message.setMsgState(MessageState.READ) message.setReadTimeStamp(time.time()) message.setExecuteSessionId( changeContext.getManager().getSessionId()) self._statusUpdateUtil.logInfo(message, HelixStateMachineEngine, "New Message", accessor) readMsgs.append(message) if not message.isControlerMsg() and ( message.getMsgType() == MessageType.toString( MessageType.STATE_TRANSITION)): # String resourceName = message.getResourceName() if not curResourceNames.__contains__( resourceName) and not createCurStateNames.__contains__( resourceName): createCurStateNames.add(resourceName) createCurStateKeys.append( keyBuilder.currentState(instanceName, sessionId, resourceName)) # CurrentState metaCurState = CurrentState(resourceName) metaCurState.setBucketSize(message.getBucketSize()) metaCurState.setStateModelDefRef( message.getStateModelDef()) metaCurState.setSessionId(sessionId) metaCurState.setGroupMessageMode( message.getGroupMessageMode()) # String ftyName = message.getStateModelFactoryName() if ftyName != None: metaCurState.setStateModelFactoryName(ftyName) else: metaCurState.setStateModelFactoryName( HelixConstants.DEFAULT_STATE_MODEL_FACTORY) metaCurStates.append(metaCurState)
class ZKHelixManager(HelixManager): """ Java modifiers: private static Type: Logger """ logger = get_logger(__name__) """ Java modifiers: private final static Type: int """ RETRY_LIMIT = 3 """ Java modifiers: private final static Type: int """ CONNECTIONTIMEOUT = 60 * 1000 """ Java modifiers: private final static Type: int """ DEFAULT_SESSION_TIMEOUT = 30 * 1000 """ Parameters: String clusterName String instanceName InstanceType instanceType String zkConnectString Throws: Exception """ def __init__(self, clusterName, instanceName, instanceType, zkConnectString): self.logger.info("Create a zk-based cluster manager. clusterName:" + clusterName + ", instanceName:" + instanceName + ", type:" + InstanceType.toString(instanceType) + ", zkSvr:" + zkConnectString) # print("Create a zk-based cluster manager. clusterName:" + clusterName + ", instanceName:" + instanceName + ", type:" + InstanceType.toString(instanceType) + ", zkSvr:" + zkConnectString) # int sessionTimeoutInt = -1 sessionTimeoutInt = self.DEFAULT_SESSION_TIMEOUT # try: # sessionTimeoutInt = int(env["zk.session.timeout", "" + self.DEFAULT_SESSION_TIMEOUT)) # except ValueError, e: # self.logger.warn("Exception while parsing session timeout: " + System.getProperty("zk.session.timeout", "" + self.DEFAULT_SESSION_TIMEOUT)) if sessionTimeoutInt > 0: self._sessionTimeout = sessionTimeoutInt else: self._sessionTimeout = self.DEFAULT_SESSION_TIMEOUT if instanceName == None: # try: instanceName = socket.gethostbyname( socket.gethostname()) + "-" + instanceType.toString() # except UnknownHostException, e: # self.logger.info("Unable to get host name. Will set it to UNKNOWN, mostly ignorable", e) # instanceName = "UNKNOWN" self._clusterName = clusterName self._instanceName = instanceName self._instanceType = instanceType self._zkConnectString = zkConnectString self._zkStateChangeListener = ZkStateChangeListener(self) self._timer = None # self._handlers = ArrayList<CallbackHandler>() self._handlers = [] self._messagingService = DefaultMessagingService(self) # TODO: fix ther version #self._version = PropertiesReader("cluster-manager-version.properties").getProperty("clustermanager.version") self._version = '0.5.1' self._stateMachEngine = HelixStateMachineEngine(self) # self._controllerTimerTasks = ArrayList<HelixTimerTask>() self._controllerTimerTasks = [] if self._instanceType == InstanceType.CONTROLLER: raise HelixException( "ERROR. Cannot handle CONTROLLER type for now!") # self._controllerTimerTasks.add(HealthStatsAggregationTask(self)) self._preConnectCallbacks = [] def isInstanceSetup(self): """ Returns boolean Java modifiers: private """ if self._instanceType == InstanceType.PARTICIPANT or self._instanceType == InstanceType.CONTROLLER_PARTICIPANT: # boolean isValid = self._zkClient.exists(PropertyPathConfig.getPath(PropertyType.CONFIGS, self._clusterName, ConfigScopeProperty.toString(ConfigScopeProperty.PARTICIPANT), self._instanceName)) \ and self._zkClient.exists(PropertyPathConfig.getPath(PropertyType.MESSAGES, self._clusterName, self._instanceName)) \ and self._zkClient.exists(PropertyPathConfig.getPath(PropertyType.CURRENTSTATES, self._clusterName, self._instanceName)) \ and self._zkClient.exists(PropertyPathConfig.getPath(PropertyType.STATUSUPDATES, self._clusterName, self._instanceName)) \ and self._zkClient.exists(PropertyPathConfig.getPath(PropertyType.ERRORS, self._clusterName, self._instanceName)) return isValid return True def addIdealStateChangeListener(self, listener): """ Returns void Parameters: listener: IdealStateChangeListener @Override Throws: Exception """ self.logger.info("ClusterManager.addIdealStateChangeListener()") self.checkConnected() # String path = PropertyPathConfig.getPath(PropertyType.IDEALSTATES, self._clusterName) # CallbackHandler callbackHandler = self.createCallBackHandler(path, listener, [ ZKEventType.NodeDataChanged, ZKEventType.NodeDeleted, ZKEventType.NodeCreated ], ChangeType.IDEAL_STATE) self.addListener(callbackHandler) def addLiveInstanceChangeListener(self, listener): """ Returns void Parameters: listener: LiveInstanceChangeListener @Override Throws: Exception """ self.logger.info("ClusterManager.addLiveInstanceChangeListener()") self.checkConnected() # String path = self._helixAccessor.keyBuilder().liveInstances().getPath() # CallbackHandler callbackHandler = self.createCallBackHandler(path, listener, [ ZKEventType.NodeDataChanged, ZKEventType.NodeChildrenChanged, ZKEventType.NodeDeleted, ZKEventType.NodeCreated ], ChangeType.LIVE_INSTANCE) self.addListener(callbackHandler) def addConfigChangeListener(self, listener): pass # """ # Returns void # Parameters: # listener: ConfigChangeListener # @Override # # # """ # self.logger.info("ClusterManager.addConfigChangeListener()") # self.checkConnected() # # String # path = PropertyPathConfig.getPath(PropertyType.CONFIGS, self._clusterName, ConfigScopeProperty.PARTICIPANT.toString()) # # CallbackHandler # callbackHandler = self.createCallBackHandler(path, listener, [ZKEventType.NodeChildrenChanged ], ChangeType.CONFIG) # self.addListener(callbackHandler) def addMessageListener(self, listener, instanceName): """ Returns void Parameters: listener: MessageListenerinstanceName: String @Override """ self.logger.info("ClusterManager.addMessageListener() " + instanceName) self.checkConnected() # String path = self._helixAccessor.keyBuilder().messages( instanceName).getPath() # CallbackHandler callbackHandler = self.createCallBackHandler(path, listener, [ ZKEventType.NodeChildrenChanged, ZKEventType.NodeDeleted, ZKEventType.NodeCreated ], ChangeType.MESSAGE) self.addListener(callbackHandler) def addControllerMessageListener(self, listener): """ Returns void Parameters: listener: MessageListener """ self.logger.info("ClusterManager.addControllerMessageListener()") self.checkConnected() # String path = self._helixAccessor.keyBuilder().controllerMessages().getPath() # CallbackHandler callbackHandler = self.createCallBackHandler( path, listener, [ ZKEventType.NodeChildrenChanged, ZKEventType.NodeDeleted, ZKEventType.NodeCreated ], ChangeType.MESSAGES_CONTROLLER) self.addListener(callbackHandler) def addCurrentStateChangeListener(self, listener, instanceName, sessionId): """ Returns void Parameters: listener: CurrentStateChangeListenerinstanceName: StringsessionId: String @Override """ self.logger.info("ClusterManager.addCurrentStateChangeListener() " + instanceName + " " + sessionId) self.checkConnected() # String path = self._helixAccessor.keyBuilder().currentStates( instanceName, sessionId).getPath() # CallbackHandler callbackHandler = self.createCallBackHandler(path, listener, [ ZKEventType.NodeChildrenChanged, ZKEventType.NodeDeleted, ZKEventType.NodeCreated ], ChangeType.CURRENT_STATE) self.addListener(callbackHandler) def addHealthStateChangeListener(self, listener, instanceName): """ Returns void Parameters: listener: HealthStateChangeListenerinstanceName: String @Override """ self.logger.info("ClusterManager.addHealthStateChangeListener()" + instanceName) self.checkConnected() # String path = self._helixAccessor.keyBuilder().healthReports( instanceName).getPath() # CallbackHandler callbackHandler = self.createCallBackHandler(path, listener, [ ZKEventType.NodeChildrenChanged, ZKEventType.NodeDataChanged, ZKEventType.NodeDeleted, ZKEventType.NodeCreated ], ChangeType.HEALTH) self.addListener(callbackHandler) def addExternalViewChangeListener(self, listener): """ Returns void Parameters: listener: ExternalViewChangeListener @Override """ self.logger.info("ClusterManager.addExternalViewChangeListener()") self.checkConnected() # String path = self._helixAccessor.keyBuilder().externalViews().getPath() # CallbackHandler callbackHandler = self.createCallBackHandler(path, listener, [ ZKEventType.NodeDataChanged, ZKEventType.NodeDeleted, ZKEventType.NodeCreated ], ChangeType.EXTERNAL_VIEW) self.addListener(callbackHandler) def getDataAccessor(self): """ Returns DataAccessor @Override """ self.checkConnected() return self._accessor def getHelixDataAccessor(self): """ Returns HelixDataAccessor @Override """ self.checkConnected() return self._helixAccessor def getConfigAccessor(self): """ Returns ConfigAccessor @Override """ self.checkConnected() return self._configAccessor def getClusterName(self): """ Returns String @Override """ return self._clusterName def getInstanceName(self): """ Returns String @Override """ return self._instanceName def connect(self): """ Returns void @Override Throws: Exception """ self.logger.info("ClusterManager.connect()") if self._zkStateChangeListener.isConnected(): self.logger.warn("Cluster manager " + self._clusterName + " " + self._instanceName + " already connected") return # temp self.createClient(self._zkConnectString) self._messagingService.onConnected() # try: # self.createClient(self._zkConnectString) # self._messagingService.onConnected() # except Exception, e: # self.logger.error(e) # self.disconnect() # raise e def disconnect(self): """ Returns void @Override """ if not self.isConnected(): self.logger.warn("ClusterManager " + self._instanceName + " already disconnected") return self.logger.info("disconnect " + self._instanceName + "(" + self._instanceType + ") from " + self._clusterName) self._messagingService.getExecutor().shutDown() self.resetHandlers() self._helixAccessor.shutdown() if self._leaderElectionHandler != None: self._leaderElectionHandler.reset() if self._participantHealthCheckInfoCollector != None: self._participantHealthCheckInfoCollector.stop() if self._timer != None: self._timer.cancel() self._timer = None if self._instanceType == InstanceType.CONTROLLER: self.stopTimerTasks() if self._propertyStore != None: self._propertyStore.stop() self._zkClient.unsubscribeAll() self._zkClient.close() self._zkStateChangeListener.disconnect() self.logger.info("Cluster manager: " + self._instanceName + " disconnected") def getSessionId(self): """ Returns String @Override """ self.checkConnected() return self._sessionId def isConnected(self): """ Returns boolean @Override """ return self._zkStateChangeListener.isConnected() def getLastNotificationTime(self): """ Returns long @Override """ return -1 def addControllerListener(self, listener): """ Returns void Parameters: listener: ControllerChangeListener @Override """ self.checkConnected() # String path = self._helixAccessor.keyBuilder().controller().getPath() self.logger.info("Add controller listener at: " + path) # CallbackHandler callbackHandler = self.createCallBackHandler(path, listener, [ ZKEventType.NodeChildrenChanged, ZKEventType.NodeDeleted, ZKEventType.NodeCreated ], ChangeType.CONTROLLER) self.addListener(callbackHandler) def removeListener(self, listener): """ Returns boolean Parameters: listener: Object @Override """ self.logger.info("remove listener: " + listener + " from " + self._instanceName) # synchronized (self) # Iterator<CallbackHandler> # iterator = self._handlers.iterator() newHandlers = [] for handler in self._handlers: # CallbackHandler # handler = iterator.next() if (handler.getListener() == listener): handler.reset() else: newHandlers.append(handler) self._handlers = newHandlers return True def addLiveInstance(self): """ Returns void Java modifiers: private """ # LiveInstance liveInstance = LiveInstance(self._instanceName) liveInstance.setSessionId(self._sessionId) liveInstance.setHelixVersion(self._version) liveInstance.setLiveInstance("%s@%s" % (os.getpid(), socket.gethostname())) # liveInstance.setLiveInstance(ManagementFactory.getRuntimeMXBean().getName()) self.logger.info("Add live instance: InstanceName: " + self._instanceName + " Session id:" + str(self._sessionId)) # Builder keyBuilder = self._helixAccessor.keyBuilder() if not self._helixAccessor.createProperty( keyBuilder.liveInstance(self._instanceName), liveInstance): # String errorMsg = "Fail to create live instance node after waiting, so quit. instance:" + self._instanceName self.logger.warn(errorMsg) # TODO: still raise # raise HelixException(errorMsg) # String currentStatePathParent = PropertyPathConfig.getPath( PropertyType.CURRENTSTATES, self._clusterName, self._instanceName, self.getSessionId()) if not self._zkClient.exists(currentStatePathParent): self._zkClient.createPersistent(currentStatePathParent) self.logger.info("Creating current state path " + currentStatePathParent) def startStatusUpdatedumpTask(self): """ Returns void Java modifiers: private """ # long initialDelay = 30 * 60 * 1000 # long period = 120 * 60 * 1000 # int timeThresholdNoChange = 180 * 60 * 1000 # TODO: comment out for now # if self._timer == None: # self._timer = Timer(True) # self._timer.scheduleAtFixedRate(ZKPathDataDumpTask(self, self._zkClient, timeThresholdNoChange), initialDelay, period) def createClient(self, zkServers): """ Returns void Parameters: zkServers: String Java modifiers: private Throws: Exception """ # String propertyStorePath = PropertyPathConfig.getPath( PropertyType.PROPERTYSTORE, self._clusterName) # PathBasedZkSerializer zkSerializer = ChainedPathZkSerializer.builder( ZNRecordStreamingSerializer()).serialize( propertyStorePath, ByteArraySerializer()).build() # this does notwork, need ot use the path # zkSerializer = ByteArraySerializer() # zkSerializer = BasicZkSerializer(ByteArraySerializer()) self._zkClient = ZkClient(zkServers, self._sessionTimeout, self.CONNECTIONTIMEOUT, zkSerializer) # dzhang: not there in latest version # self._accessor = ZKDataAccessor(self._clusterName, self._zkClient) # ZkBaseDataAccessor<ZNRecord> # baseDataAccessor = ZkBaseDataAccessor<ZNRecord>(self._zkClient) baseDataAccessor = ZkBaseDataAccessor(self._zkClient) if self._instanceType == InstanceType.PARTICIPANT: # String curStatePath = PropertyPathConfig.getPath( PropertyType.CURRENTSTATES, self._clusterName, self._instanceName) # TODO: handle cache later self._baseDataAccessor = baseDataAccessor # self._baseDataAccessor = ZkCacheBaseDataAccessor<ZNRecord>(baseDataAccessor, curStatePath) # self._baseDataAccessor = ZkCacheBaseDataAccessor<ZNRecord>(baseDataAccessor, Arrays.asList(curStatePath)) else: if self._instanceType == InstanceType.CONTROLLER: # String extViewPath = PropertyPathConfig.getPath( PropertyType.EXTERNALVIEW, self._clusterName) # TODO: handle cache later self._baseDataAccessor = baseDataAccessor # self._baseDataAccessor = ZkCacheBaseDataAccessor<ZNRecord>(baseDataAccessor, extViewPath) # self._baseDataAccessor = ZkCacheBaseDataAccessor<ZNRecord>(baseDataAccessor, Arrays.asList(extViewPath)) else: self._baseDataAccessor = baseDataAccessor self._helixAccessor = ZKHelixDataAccessor(self._clusterName, self._instanceType, self._baseDataAccessor) self._configAccessor = ConfigAccessor(self._zkClient) # int retryCount = 0 self._zkClient.subscribeStateChanges(self._zkStateChangeListener) # set the state? self._zkStateChangeListener.handleStateChanged(KazooState.CONNECTED) self._zkStateChangeListener.handleNewSession() # while retryCount < self.RETRY_LIMIT: # try: # # kazoo.client.start() will wait, should we call start here? ## self._zkClient.waitUntilConnected(self._sessionTimeout, TimeUnit.MILLISECONDS) ## self._zkStateChangeListener.handleStateChanged(KeeperState.SyncConnected) # self._zkStateChangeListener.handleStateChanged(KazooState.CONNECTED) # self._zkStateChangeListener.handleNewSession() # break # except HelixException, e: # self.logger.error("fail to createClient."+ str(e)) # raise e # # except Exception, e: # retryCount+=1 # self.logger.error("fail to createClient. retry " + str(retryCount)+ str(e)) # if retryCount == self.RETRY_LIMIT: # raise e def createCallBackHandler(self, path, listener, eventTypes, changeType): """ Returns CallbackHandler Parameters: path: Stringlistener: ObjecteventTypes: EventType[]changeType: ChangeType Java modifiers: private """ if listener == None: raise HelixException("Listener cannot be null") return CallbackHandler(self, self._zkClient, path, listener, eventTypes, changeType) def handleNewSession(self): """ Returns void Java modifiers: protected """ # boolean isConnected = self._zkClient.waitUntilConnected( self.CONNECTIONTIMEOUT, TimeUnit.MILLISECONDS) while not isConnected: self.logger.error("Could NOT connect to zk server in " + str(self.CONNECTIONTIMEOUT) + "ms. zkServer: " + str(self._zkConnectString) + ", expiredSessionId: " + str(self._sessionId) + ", clusterName: " + str(self._clusterName)) isConnected = self._zkClient.waitUntilConnected( self.CONNECTIONTIMEOUT, TimeUnit.MILLISECONDS) # ZkConnection # zkConnection = ((ZkConnection) self._zkClient.getConnection()) # synchronized (self) # self._sessionId = Long.toHexString(zkConnection.getZookeeper().getSessionId()) self._sessionId = self._zkClient.getSessionId() # self._accessor.reset() self._baseDataAccessor.reset() self.resetHandlers() self.logger.info("Handling new session, session id:" + str(self._sessionId) + ", instance:" + self._instanceName + ", instanceTye: " + InstanceType.toString(self._instanceType) + ", cluster: " + self._clusterName) # self.logger.info(zkConnection.getZookeeper()) #TODO: enable this # if not ZKUtil.isClusterSetup(self._clusterName, self._zkClient): # raise HelixException("Initial cluster structure is not set up for cluster:" + self._clusterName) if not self.isInstanceSetup(): raise HelixException( "Initial cluster structure is not set up for instance:" + self._instanceName + " instanceType:" + InstanceType.toString(self._instanceType)) if self._instanceType == InstanceType.PARTICIPANT or self._instanceType == InstanceType.CONTROLLER_PARTICIPANT: self.handleNewSessionAsParticipant() if self._instanceType == InstanceType.CONTROLLER or self._instanceType == InstanceType.CONTROLLER_PARTICIPANT: self.addControllerMessageListener( self._messagingService.getExecutor()) # MessageHandlerFactory defaultControllerMsgHandlerFactory = DefaultControllerMessageHandlerFactory( ) self._messagingService.getExecutor().registerMessageHandlerFactory( defaultControllerMsgHandlerFactory.getMessageType(), defaultControllerMsgHandlerFactory) # TODO: skip MessageHandlerFactory for now # MessageHandlerFactory # defaultSchedulerMsgHandlerFactory = DefaultSchedulerMessageHandlerFactory(self) # self._messagingService.getExecutor().registerMessageHandlerFactory(defaultSchedulerMsgHandlerFactory.getMessageType(), defaultSchedulerMsgHandlerFactory) # MessageHandlerFactory defaultParticipantErrorMessageHandlerFactory = DefaultParticipantErrorMessageHandlerFactory( self) self._messagingService.getExecutor().registerMessageHandlerFactory( defaultParticipantErrorMessageHandlerFactory.getMessageType(), defaultParticipantErrorMessageHandlerFactory) # TODO: enable this # if self._leaderElectionHandler == None: # # String # path = PropertyPathConfig.getPath(PropertyType.CONTROLLER, self._clusterName) # self._leaderElectionHandler = self.createCallBackHandler(path, DistClusterControllerElection(self._zkConnectString), [ZKEventType.NodeChildrenChanged, ZKEventType.NodeDeleted, ZKEventType.NodeCreated ], ChangeType.CONTROLLER) # else: # self._leaderElectionHandler.init() if self._instanceType == InstanceType.PARTICIPANT or self._instanceType == InstanceType.CONTROLLER_PARTICIPANT or ( self._instanceType == InstanceType.CONTROLLER and self.isLeader()): self.initHandlers() def handleNewSessionAsParticipant(self): """ Returns void Java modifiers: private """ # Builder keyBuilder = self._helixAccessor.keyBuilder() if self._helixAccessor.getProperty( keyBuilder.liveInstance(self._instanceName)) != None: self.logger.warn( "Found another instance with same instanceName: " + self._instanceName + " in cluster " + self._clusterName) try: time.sleep((self._sessionTimeout + 5000) / 1000.0) except Exception, e: self.logger.warn( "Sleep interrupted while waiting for previous liveinstance to go away.", e) if self._helixAccessor.getProperty( keyBuilder.liveInstance(self._instanceName)) != None: # String errorMessage = "instance " + self._instanceName + " already has a liveinstance in cluster " + self._clusterName self.logger.error(errorMessage) raise HelixException(errorMessage) for callback in self._preConnectCallbacks: callback.onPreConnect() self.addLiveInstance() self.carryOverPreviousCurrentState() self._messagingService.registerMessageHandlerFactory( MessageType.toString(MessageType.STATE_TRANSITION), self._stateMachEngine) self.addMessageListener(self._messagingService.getExecutor(), self._instanceName)
def createHandler(self, message, context): """ Returns MessageHandler Parameters: message: Messagecontext: NotificationContext @Override """ # String type = message.getMsgType() if not (type == MessageType.toString(MessageType.STATE_TRANSITION)): raise HelixException("Unexpected msg type for message " + message.getMsgId() + " type:" + message.getMsgType()) # String partitionKey = message.getPartitionName() # String stateModelName = message.getStateModelDef() # String resourceName = message.getResourceName() # String sessionId = message.getTgtSessionId() # int bucketSize = message.getBucketSize() if stateModelName == None: self.logger.error("message does not contain stateModelDef") return None # String factoryName = message.getStateModelFactoryName() if factoryName == None: factoryName = HelixConstants.DEFAULT_STATE_MODEL_FACTORY # StateModelFactory stateModelFactory = self.getStateModelFactory(stateModelName, factoryName) if stateModelFactory == None: self.logger.warn("Cannot find stateModelFactory for model:" + stateModelName + " using factoryName:" + factoryName + " for resourceGroup:" + resourceName) return None if not self._stateModelDefs.__contains__(stateModelName): # HelixDataAccessor accessor = self._manager.getHelixDataAccessor() # Builder keyBuilder = accessor.keyBuilder() # StateModelDefinition stateModelDef = accessor.getProperty( keyBuilder.stateModelDef(stateModelName)) if stateModelDef == None: raise HelixException("stateModelDef for " + stateModelName + " does NOT exists") self._stateModelDefs.__setitem__(stateModelName, stateModelDef) # String initState = self._stateModelDefs.get( message.getStateModelDef()).getInitialState() # StateModel stateModel = stateModelFactory.getStateModel(partitionKey) if stateModel == None: stateModelFactory.createAndAddStateModel(partitionKey) stateModel = stateModelFactory.getStateModel(partitionKey) stateModel.updateState(initState) # CurrentState currentStateDelta = CurrentState(resourceName) currentStateDelta.setSessionId(sessionId) currentStateDelta.setStateModelDefRef(stateModelName) currentStateDelta.setStateModelFactoryName(factoryName) currentStateDelta.setBucketSize(bucketSize) currentStateDelta.setState( partitionKey, ternary((stateModel.getCurrentState() == None), initState, stateModel.getCurrentState())) # HelixTaskExecutor executor = context.get(NotificationContext.TASK_EXECUTOR_KEY) # executor = (HelixTaskExecutor) context.get(NotificationContext.TASK_EXECUTOR_KEY) return HelixStateTransitionHandler(stateModel, message, context, currentStateDelta, executor)
def onMessage(self, instanceName, messages, changeContext): """ Returns void Parameters: instanceName: Stringmessages: List<Message>changeContext: NotificationContext @Override """ if changeContext.getType() == NotificationContext.Type.FINALIZE: self.LOG.info("Get FINALIZE notification") for factory in self._handlerFactoryMap.values(): factory.reset() for f in self._taskMap.values():# Future<HelixTaskResult> f.cancel(True) self._taskMap.clear() return # HelixManager manager = changeContext.getManager() # HelixDataAccessor accessor = manager.getHelixDataAccessor() # Builder keyBuilder = accessor.keyBuilder() if messages == None or len(messages) == 0: self.LOG.info("No Messages to process") return # Collections.sort(messages, Message.CREATE_TIME_COMPARATOR) messages.sort(Message.CREATE_TIME_COMPARATOR) # List<MessageHandler> handlers = [] # handlers = ArrayList<MessageHandler>() # List<Message> # readMsgs = ArrayList<Message>() readMsgs = [] # String sessionId = manager.getSessionId() # List<String> curResourceNames = accessor.getChildNames(keyBuilder.currentStates(instanceName, sessionId)) # List<PropertyKey> createCurStateKeys = [] # List<CurrentState> metaCurStates = [] # Set<String> createCurStateNames = set() changeContext.add(NotificationContext.TASK_EXECUTOR_KEY, self) for message in messages: if message.getMsgType().upper() == MessageType.toString(MessageType.NO_OP).upper(): self.LOG.info("Dropping NO-OP message. mid: " + message.getId() + ", from: " + message.getMsgSrc()) accessor.removeProperty(message.getKey(keyBuilder, instanceName)) continue # String tgtSessionId = message.getTgtSessionId() if not (sessionId == tgtSessionId) and not (tgtSessionId == "*"): # String warningMessage = "SessionId does NOT match. expected sessionId: " + sessionId + ", tgtSessionId in message: " + tgtSessionId + ", messageId: " + message.getMsgId() self.LOG.warn(warningMessage) accessor.removeProperty(message.getKey(keyBuilder, instanceName)) self._statusUpdateUtil.logWarning(message, HelixStateMachineEngine, warningMessage, accessor) continue if MessageState.NEW != message.getMsgState(): self.LOG.trace("Message already read. mid: " + message.getMsgId()) continue try: # List<MessageHandler> createHandlers = self.createMessageHandlers(message, changeContext) # if createHandlers.isEmpty(): if not createHandlers: continue handlers.extend(createHandlers) except KeyboardInterrupt, e: #TODO: enable this? # except Exception, e: # String error = "Failed to create message handler for " + message.getMsgId() + ", exception: " + str(e) self._statusUpdateUtil.logError(message, HelixStateMachineEngine, e, error, accessor) message.setMsgState(MessageState.UNPROCESSABLE) accessor.removeProperty(message.getKey(keyBuilder, instanceName)) # TODO: comment out for now # # ObjectMapper # mapper = ObjectMapper() # # SerializationConfig # serializationConfig = mapper.getSerializationConfig() # serializationConfig.set(SerializationConfig.Feature.INDENT_OUTPUT, True) # # StringWriter # sw = StringWriter() # try: # mapper.writeValue(sw, message.getRecord()) # self.LOG.error("Message cannot be processed:" + str(sw.toString())+ str(e)) # except Exception, ex: # self.LOG.error(""+ str(ex)) continue message.setMsgState(MessageState.READ) message.setReadTimeStamp(time.time()) message.setExecuteSessionId(changeContext.getManager().getSessionId()) self._statusUpdateUtil.logInfo(message, HelixStateMachineEngine, "New Message", accessor) readMsgs.append(message) if not message.isControlerMsg() and (message.getMsgType() == MessageType.toString(MessageType.STATE_TRANSITION)): # String resourceName = message.getResourceName() if not curResourceNames.__contains__(resourceName) and not createCurStateNames.__contains__(resourceName): createCurStateNames.add(resourceName) createCurStateKeys.append(keyBuilder.currentState(instanceName, sessionId, resourceName)) # CurrentState metaCurState = CurrentState(resourceName) metaCurState.setBucketSize(message.getBucketSize()) metaCurState.setStateModelDefRef(message.getStateModelDef()) metaCurState.setSessionId(sessionId) metaCurState.setGroupMessageMode(message.getGroupMessageMode()) # String ftyName = message.getStateModelFactoryName() if ftyName != None: metaCurState.setStateModelFactoryName(ftyName) else: metaCurState.setStateModelFactoryName(HelixConstants.DEFAULT_STATE_MODEL_FACTORY) metaCurStates.append(metaCurState)