def stop(s, p): global listener try: logger.info('Boot event listener stoping...') listener.stop() except Exception, err: logger.error('Stoping boot event listener error: %s'%err)
def run(self): name = self.getName() logger.info('%s started!'%name) while True: try: operation = self.queue.get() if operation == FINISH_FLAG: logger.info('%s stoped!'%name) break try: PluginManager.process(operation) except Exception, err: logger.error('Processing operation %s failed! Details: %s.\ \nInput parameters: session_id=%s, node=%s, parameters=%s'% (operation.operation_name, err, operation.session_id, operation.node, operation.parameters)) op_args = Dictionary(signature='ss') op_args.update(operation.parameters) self.dbus_client.operationProcessedEvent(str(operation.session_id), str(operation.node), str(operation.operation_name), op_args) except Exception, err: logger.error('%s failed: %s'%(name, err))
def run(self): """Thread class run method. Call asynchronous cluster operation""" fri_client = FriCaller() logger.info('%s started!'%self.getName()) while True: try: item = self.queue.get() if item == STOP_THREAD_EVENT: logger.info('%s stopped!'%self.getName()) break #unpack item node, packet = item err_code, err_message = fri_client.call(node, packet) if err_code != RC_OK: logger.error('%s: error while sending %s to node %s. Details: %s' % (self.getName(), packet, node, err_message)) else: logger.debug('%s: node %s is called successful' % (self.getName(), node)) except Exception, err: err_message = '%s failed: %s'%(self.getName(), err) logger.error(err_message) finally:
def updateOperationProgress(self, progress_percent, ret_message='', ret_code=0, ret_params={}): ''' Updating operation progress. Send progress information to management node by FRI protocol DON'T REIMPLEMENT THIS METHOD @progress_percent (integer) - operation progress in percents (100 = finished operation) @ret_code (integer) - indicate state of operation processing (if 0 then already ok, else operation is failed) @ret_message (string) - description of operation progress @ret_params (dict {param_name: param_value}) - return parameters ''' fri_caller = FriCaller() packet = { 'id': self.__session_id, 'node': self.__node, 'progress': progress_percent, 'ret_code': ret_code, 'ret_message': ret_message, 'ret_parameters': ret_params} code, message = fri_caller.call(FARNSWORTH_HOSTNAME, packet, NODES_MANAGER_PORT, timeout=WAIT_TIMEOUT) if code: logger.error('%s.updateOperationProgress failed with code=%s and message=%s'% (self.__class__.__name__, code, message))
def onAsyncOperationResult(self, session_id, node, progress, ret_code, ret_message, ret_params_map): ''' Reimplemented FriClient class method for performing asynchronous operation results @session_id (string) identifier of session (operation instance id) @node (string) node hostname @progress (integer) operation progress in percents (100 for end of operation) @ret_code (integer) code of result @ret_message (string) result description @ret_params_map (dict {<param_name>:<param_value>}) return parameters ''' try: operation = self._active_operations.get(session_id, None) if operation is None: logger.error('onAsyncOperationResult failed: Operation instance with ID=%s is not found.\ Received parameters: node=%s, ret_code=%s, ret_message=%s, ret_params_map=%s'%\ (node, ret_code, ret_message, ret_params_map)) return if operation.ret_params_map.has_key(node): operation.ret_params_map[node].update(ret_params_map) else: operation.ret_params_map[node] = ret_params_map is_completed = self._update_operation_progress(session_id, node, progress, ret_code, ret_message) if is_completed: self._finish_operation(session_id, operation) except: pass
def run(self): logger.info('%s started!'%self.getName()) fri_caller = FriCaller() packet = {'id':0, 'node': '', 'operation': 'LIVE'} while True: try: item = self.queue.get() if item == FINISH_FLAG: logger.info('%s stoped!'%self.getName()) break hostname, last_state = item ret_code, ret_message = fri_caller.call(hostname, packet, timeout=self.wait_timeout) if ret_code: logger.debug('Node with hostname %s is not live!!!'%hostname) else: logger.debug('Node with hostname %s is live!'%hostname) if ret_code and (last_state == CNS_ON): self.__change_node_state(hostname, CNS_OFF) elif (not ret_code) and (last_state == CNS_OFF): self.__change_node_state(hostname, CNS_ON) except Exception, err: logger.error('%s failed: %s'%(self.getName(), err)) finally:
def callOperationOnCluster(self, user_name, cluster_name, operation_name, parameters): ''' Call operation in cluster: - call all operation's plugins (beforeCall method). If some plugin return not zero ret_code then return ret_code and ret_message from callOperationOnCluster method - call operation over operationsEngine ''' try: parameters = self.__cast_parameters(parameters) user_name = str(user_name) cluster_name = str(cluster_name) operation_name = str(operation_name) call_object = CallObject(CallObject.CLUSTER, cluster_name) ret_code, ret_message = self.pluginManager.processBeforeCallPlugins(operation_name, call_object, parameters) if ret_code: return 0, ret_code, ret_message return self.__call_operation(user_name, operation_name, call_object, parameters) except Exception, err: session_id = 0 ret_code = 21 ret_message = str(err) logger.error('callOperationOnCluster error: %s'%ret_message) self.__debug_error() return session_id, ret_code, ret_message
def callOperationOnNodes(self, user_name, nodes_list, operation_name, parameters): ''' Call operation on nodes list (hostnames): - call all operation's plugins (beforeCall method). If some plugin return not zero ret_code then return ret_code and ret_message from callOperationOnCluster method - call operation over operationsEngine ''' try: parameters = self.__cast_parameters(parameters) user_name = str(user_name) operation_name = str(operation_name) nodes_list = [str(node) for node in nodes_list] call_object = CallObject(CallObject.NODES, nodes_list) ret_code, ret_message = self.pluginManager.processBeforeCallPlugins(operation_name, call_object, parameters) if ret_code: return 0, ret_code, ret_message return self.__call_operation(user_name, operation_name, call_object, parameters) except Exception, err: session_id = 0 ret_code = 21 ret_message = str(err) logger.error('callOperationOnNodes error: %s'%ret_message) self.__debug_error() return session_id, ret_code, ret_message
def operationCall(self, session_id, nodes_list, operation_name, parameters_map): ''' Calling asynchronous operation on nodes list @session_id (string) identifier of session (operation instance id) @nodes_list (list of string) list of nodes hostnames (or IP addresses) @operation_name (string) name of operation @parameters_map (dict {<param_name>:<param_value>}) operation parameters @return ret_code (integer) code of result @return ret_message (string) result description ''' try: logger.debug('FriClient: [%s] calling %s operation at nodes %s'%(session_id, operation_name, nodes_list)) packet = self.__form_fri_packet(session_id, operation_name, parameters_map) logger.debug('FriClient: [%s] FRI packet: %s' % (session_id,packet)) for node in nodes_list: n_packet = packet.copy() n_packet['node'] = node self.__async_packets.put((node, n_packet)) return (RC_OK, '') except Exception, err: err_message = 'FriClient failed on operation call: %s'%err logger.error(err_message) return (RC_ERROR, err_message)
def __delete_session(self, session_id): self._dbconn.start_transaction() try: self._dbconn.modify("DELETE FROM NM_OPERATION_PROGRESS WHERE instance_id=%s", (session_id,)) self._dbconn.modify("DELETE FROM NM_OPERATION_INSTANCE WHERE id=%s", (session_id,)) except Exception, err: logger.error('OperationsEngine.__delete_session: %s'%err)
def _finish_operation(self, session_id, operation): try: self._dbconn.modify("UPDATE NM_OPERATION_INSTANCE SET status=%s, end_datetime=%s WHERE id=%s", (ORS_COMPLETE, datetime.now(), session_id)) self._active_operations.delete(session_id) operation.callbackFunction(operation.operation_name, session_id, ORS_COMPLETE, operation.ret_params_map) except Exception, err: logger.error('WrappedFriClient._finish_operation: %s'%err) raise err
def callOperationOnNodes(self, user_name, nodes_list, operation_name, parameters_map, onOperationResultRoutine): ''' call operation on all nodes in cluster @user_name (string) name of user who calling operation (nm_user.name) @nodes_list (list of strings) hostnames of nodes (nm_node.hostname) @operation_name (string) name of operatiob (nm_operation.name) @parameters_map (dict {param_name, param_value}) operation input parameters This routine should has following spec: (operation_name, session_id, status, ret_params_map), where operation_name (string) - name of operation session_id (integer) - identifier of session status (integer) - status of operation ret_params_map (dict) - {<node_hostname>: { <param_name> : <param_value>, ...}, ...} @return (session_id, ret_code, ret_message) ''' try: session_id = 0 logger.info('CALL operation %s on nodes %s'%(operation_name, nodes_list)) operation_id, node_type_id, timeout = self.__get_operation_info(operation_name) #select all ACTIVE nodes nodes_rows = [] for node in nodes_list: rows = self._dbconn.select("SELECT id, hostname, node_type FROM NM_NODE \ WHERE hostname = %s AND current_state=%s", (node, NCS_UP)) if rows: nodes_rows.append(rows[0]) if not nodes_rows: raise Exception('No active nodes found for execution this operation!') nodes = self.__form_nodes(nodes_rows, node_type_id) if not nodes: raise Exception('Not found nodes for operation %s.'%(operation_name,)) session_id = self.__insert_operation_into_db(operation_id, user_name, nodes) self._active_operations.put(session_id, OperationResult(operation_name, timeout, onOperationResultRoutine)) self.__call_operation(session_id, nodes, operation_name, parameters_map) return (session_id, 0, 'Operation %s is called on nodes' % (operation_name,)) except Exception, err: logger.error('calOperationOnNodes error: %s' % err) if session_id: self._active_operations.delete(session_id) self.__delete_session(session_id) return (session_id, 1, str(err))
def _update_pending_operations(self): try: dbconn = DatabaseConnection() dbconn.modify("UPDATE NM_OPERATION_INSTANCE SET status=%s, end_datetime=%s WHERE status=%s", (ORS_TIMEOUTED, datetime.now(), ORS_INPROGRESS,)) dbconn.modify("UPDATE NM_OPERATION_PROGRESS SET ret_code=%s, ret_message=%s \ WHERE progress<>100 AND (ret_code=0 OR ret_code=NULL)", (12, 'Operation is timeouted!')) del dbconn except Exception, err: logger.error('CheckOpTimeoutsThread._update_pending_operations: %s'%err)
def process(operation_obj): try: processor = PluginManager.operations_map.get(operation_obj.operation_name, None) if not processor: raise Exception('Processor is not found for %s operation'%operation_obj.operation_name) proc = processor(operation_obj.session_id, operation_obj.node) proc.process(operation_obj.parameters) except Exception, err: logger.error('Processing operation %s failed! Details: %s.\ \nInput parameters: session_id=%s, node=%s, parameters=%s'% (operation_obj.operation_name, err, operation_obj.session_id, operation_obj.node, operation_obj.parameters))
def _finish_operation(self, session_id, operation): try: self._dbconn.modify("UPDATE NM_OPERATION_INSTANCE SET status=%s, end_datetime=%s WHERE id=%s", (ORS_TIMEOUTED, datetime.now(), session_id)) self._dbconn.modify("UPDATE NM_OPERATION_PROGRESS SET ret_code=%s, ret_message=%s \ WHERE instance_id=%s AND progress<>100 AND (ret_code=0 OR ret_code=NULL)", (12, 'Operation is timeouted!', session_id)) self._active_operations.delete(session_id) operation.callbackFunction(operation.operation_name, session_id, ORS_TIMEOUTED, operation.ret_params_map) except Exception, err: logger.error('CheckOpTimeoutsThread._finish_operation: %s'%err)
def start(self): self.__bind_socket() while not self.stopped: try: (sock, addr) = self.sock.accept() if self.stopped: sock.close() break self.queue.put(sock) except Exception, err: err_message = 'FriServer failed: %s'%err logger.error(err_message)
def callOperationOnCluster(self, user_name, cluster_name, operation_name, parameters_map, onOperationResultRoutine): ''' call operation on all nodes in cluster @user_name (string) name of user who calling operation (nm_user.name) @cluster_name (string) name of cluster (nm_cluster.cluster_sid) @operation_name (string) name of operatiob (nm_operation.name) @parameters_map (dict {param_name, param_value}) operation input parameters @onOperationResultRoutine (func) callback routine for receiving operation results This routine should has following spec: (operation_name, session_id, status, ret_params_map), where operation_name (string) - name of operation session_id (integer) - identifier of session status (integer) - status of operation ret_params_map (dict) - {<node_hostname>: { <param_name> : <param_value>, ...}, ...} @return (session_id, ret_code, ret_message) ''' try: session_id = 0 logger.info('CALL operation %s on cluster %s'%(operation_name, cluster_name)) operation_id, node_type_id, timeout = self.__get_operation_info(operation_name) #select all ACTIVE nodes in cluster rows = self._dbconn.select("SELECT N.id, N.hostname, N.node_type FROM NM_NODE N, NM_CLUSTER C \ WHERE C.id = N.cluster_id AND C.cluster_sid=%s AND N.admin_status<>%s\ AND N.current_state=%s",(cluster_name, NAS_NOTACTIVE, NCS_UP)) if not rows: raise Exception('No active nodes found for cluster with name %s' % cluster_name) nodes = self.__form_nodes(rows, node_type_id) if not nodes: raise Exception('Not found nodes for operation %s in cluster %s.'%(operation_name, cluster_name)) session_id = self.__insert_operation_into_db(operation_id, user_name, nodes) self._active_operations.put(session_id, OperationResult(operation_name, timeout, onOperationResultRoutine)) self.__call_operation(session_id, nodes, operation_name, parameters_map) return (session_id, 0, 'Operation %s is called on cluster %s' % (operation_name, cluster_name)) except Exception, err: logger.error('callOperationOnCluster error: %s' % err) if session_id: self._active_operations.delete(session_id) self.__delete_session(session_id) return (session_id, 1, str(err))
def run(self): logger.debug('%s started!'%self.getName()) self._update_pending_operations() while self._is_started: try: (session_id, operation) = self._active_operations.get_timeouted_operation() if not session_id: time.sleep(1) continue self._finish_operation(session_id, operation) except Exception, err: err_message = '%s failed: %s'%(self.getName(), err) logger.error(err_message)
def getOperationStatus(self, session_id): ''' get status of operation instance @session_id (integer) identifier of operation instance (nm_operation_instance.id) @return status of operation instance ''' try: rows = self._dbconn.select("SELECT status FROM NM_OPERATION_INSTANCE WHERE id=%s",(session_id,)) if not rows: raise Exception('Operation instance with ID=%s is not found in database!' % session_id) return rows[0][0] except Exception, err: logger.error('getOperationStatus: %s'%err)
def run(self): logger.info('NodesMonitor started!') while not self.__stoped: try: t0_point = datetime.now() rows = self.__dbconn.select("SELECT N.hostname, N.current_state FROM nm_node N, nm_cluster C \ WHERE N.cluster_id=C.id AND N.admin_status=%s AND C.status=%s", (ANS_ACTIVE, CS_ACTIVE)) logger.debug('NodesMonitor: Selected %i nodes for checking state'%len(rows)) for row in rows: self.__nodes_queue.put((row[0], row[1])) self.__nodes_queue.join() except Exception, err: logger.error('NodesMonitor failed: %s' % err) finally:
def _update_operation_progress(self, session_id, node, progress, ret_code, ret_message): ''' update operation progress in database (NM_OPERATION_PROGRESS table) @return True if operation is completed or False if operation in progress ''' try: self._dbconn.modify("UPDATE NM_OPERATION_PROGRESS SET progress=%s, ret_code=%s, ret_message=%s \ WHERE instance_id=%s AND node_id=(SELECT id FROM NM_NODE WHERE hostname=%s)", (progress, ret_code, ret_message, session_id, node)) uncompleted_count = self._dbconn.select("SELECT count(id) FROM NM_OPERATION_PROGRESS \ WHERE instance_id=%s AND progress<>100 AND (ret_code=0 OR ret_code=NULL)", (session_id,)) if uncompleted_count[0][0] == 0: return True return False except Exception, err: logger.error('WrappedFriClient._update_operation_progress: %s'%err) raise err
def run(self): logger.info('%s started!'%self.getName()) while True: session_id = None node = None ret_params_map = {} ret_code = RC_OK ret_message = '' try: sock = self.queue.get() if sock == STOP_THREAD_EVENT: logger.info('%s stopped!'%self.getName()) break data = '' while True: received = sock.recv(BUF_SIZE) if not received: break data += received if len(received) < BUF_SIZE: break logger.debug('%s receive: %s'%(self.getName(),data)) if not data: raise Exception('empty data block') json_object = json.loads(data) self.__onProcessResult(json_object) except Exception, err: ret_message = '%s error: %s' % (self.getName(), err) ret_code = RC_ERROR logger.error(ret_message) finally:
def run(self): try: self.stoped = False self._remount_devfs()#FIXME uuid = self._get_uuid() hostname = self._set_new_hostname(uuid) mac_address, ip_address = self._get_interface_info() login, password = self._create_user() processor, memory = self._get_hardware_info() caller = friBase.FriCaller() packet = { 'uuid': uuid, 'hostname': hostname, 'ip_address': ip_address, 'mac_address': mac_address, 'login': login, 'password': password, 'processor': processor, 'memory': memory } logger.info('BOOT EVENT: %s'%packet) while not self.stoped: code,msg = caller.call(MANAGEMENT_SERVER, packet, LISTENER_PORT) if code != 0: logger.error('Boot event send error: %s'%msg) else: break time.sleep(SLEEP_SENDER_TIME) return code except Exception, err: logger.error("Boot event sender error: %s"%err)
if not in_line: #EOF occured break host, facility, priority, level, tag, program, isodate, msg = in_line.split('[-]') host = host.strip() node_id = NODES.get(host, None) if node_id is None: rows = dbconn.select("SELECT id FROM nm_node WHERE hostname=%s",(host,)) if rows: NODES[host] = rows[0][0] node_id = rows[0][0] dbconn.modify("INSERT INTO logs (node_id, host, facility, priority, level, tag, program, log_timestamp, msg) \ VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)", (node_id, host, facility.strip(), priority.strip(), level.strip(), tag.strip(), program.strip(), isodate.strip(), msg.strip())) if __name__ == "__main__": try: writer_loop() logger.info('Log writer exiting...') except Exception, ex: logger.error("log writer main loop caught exception: %s" % ex) sys.exit(1)
def __on_operation_results(self, operation_name, session_id, status, ret_params): try: self.pluginManager.processAfterCallPlugins(operation_name, session_id, status, ret_params) except Exception, err: logger.error('afterCall operation plugins processing error: %s'%err)
json_object = json.loads(data) self.__onProcessResult(json_object) except Exception, err: ret_message = '%s error: %s' % (self.getName(), err) ret_code = RC_ERROR logger.error(ret_message) finally: self.queue.task_done() try: if sock: sock.send(json.dumps({'ret_code':ret_code, 'ret_message':ret_message})) sock.close() except Exception, err: logger.error('%s sending result error: %s' % (self.getName(), err)) class FriCaller: """class for calling asynchronous operation over FRI protocol""" def call(self, hostname, packet, port=FRI_PORT, timeout=3.0): sock = None try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(timeout) sock.connect((hostname, port)) data = json.dumps(packet)
#-------------------------------------------------------------------------------- if __name__ == '__main__': try: logger.info('Boot event listener starting...') listener = BootEventListener() def stop(s, p): global listener try: logger.info('Boot event listener stoping...') listener.stop() except Exception, err: logger.error('Stoping boot event listener error: %s'%err) signal.signal(signal.SIGINT, stop) listener.start() logger.info('Boot event listener stoped') except Exception, err: logger.error('Boot event listener error: %s. exit!'%err) sys.exit(1)