def run(self): """Thread class run method. Call asynchronous cluster operation""" fri_client = FriCaller() logger.info('%s started!'%self.getName()) while True: try: item = self.queue.get() if item == STOP_THREAD_EVENT: logger.info('%s stopped!'%self.getName()) break #unpack item node, packet = item err_code, err_message = fri_client.call(node, packet) if err_code != RC_OK: logger.error('%s: error while sending %s to node %s. Details: %s' % (self.getName(), packet, node, err_message)) else: logger.debug('%s: node %s is called successful' % (self.getName(), node)) except Exception, err: err_message = '%s failed: %s'%(self.getName(), err) logger.error(err_message) finally:
def run(self): name = self.getName() logger.info('%s started!'%name) while True: try: operation = self.queue.get() if operation == FINISH_FLAG: logger.info('%s stoped!'%name) break try: PluginManager.process(operation) except Exception, err: logger.error('Processing operation %s failed! Details: %s.\ \nInput parameters: session_id=%s, node=%s, parameters=%s'% (operation.operation_name, err, operation.session_id, operation.node, operation.parameters)) op_args = Dictionary(signature='ss') op_args.update(operation.parameters) self.dbus_client.operationProcessedEvent(str(operation.session_id), str(operation.node), str(operation.operation_name), op_args) except Exception, err: logger.error('%s failed: %s'%(name, err))
def run(self): logger.info('%s started!'%self.getName()) fri_caller = FriCaller() packet = {'id':0, 'node': '', 'operation': 'LIVE'} while True: try: item = self.queue.get() if item == FINISH_FLAG: logger.info('%s stoped!'%self.getName()) break hostname, last_state = item ret_code, ret_message = fri_caller.call(hostname, packet, timeout=self.wait_timeout) if ret_code: logger.debug('Node with hostname %s is not live!!!'%hostname) else: logger.debug('Node with hostname %s is live!'%hostname) if ret_code and (last_state == CNS_ON): self.__change_node_state(hostname, CNS_OFF) elif (not ret_code) and (last_state == CNS_OFF): self.__change_node_state(hostname, CNS_ON) except Exception, err: logger.error('%s failed: %s'%(self.getName(), err)) finally:
def stop(s, p): global listener try: logger.info('Boot event listener stoping...') listener.stop() except Exception, err: logger.error('Stoping boot event listener error: %s'%err)
def callOperationOnNodes(self, user_name, nodes_list, operation_name, parameters_map, onOperationResultRoutine): ''' call operation on all nodes in cluster @user_name (string) name of user who calling operation (nm_user.name) @nodes_list (list of strings) hostnames of nodes (nm_node.hostname) @operation_name (string) name of operatiob (nm_operation.name) @parameters_map (dict {param_name, param_value}) operation input parameters This routine should has following spec: (operation_name, session_id, status, ret_params_map), where operation_name (string) - name of operation session_id (integer) - identifier of session status (integer) - status of operation ret_params_map (dict) - {<node_hostname>: { <param_name> : <param_value>, ...}, ...} @return (session_id, ret_code, ret_message) ''' try: session_id = 0 logger.info('CALL operation %s on nodes %s'%(operation_name, nodes_list)) operation_id, node_type_id, timeout = self.__get_operation_info(operation_name) #select all ACTIVE nodes nodes_rows = [] for node in nodes_list: rows = self._dbconn.select("SELECT id, hostname, node_type FROM NM_NODE \ WHERE hostname = %s AND current_state=%s", (node, NCS_UP)) if rows: nodes_rows.append(rows[0]) if not nodes_rows: raise Exception('No active nodes found for execution this operation!') nodes = self.__form_nodes(nodes_rows, node_type_id) if not nodes: raise Exception('Not found nodes for operation %s.'%(operation_name,)) session_id = self.__insert_operation_into_db(operation_id, user_name, nodes) self._active_operations.put(session_id, OperationResult(operation_name, timeout, onOperationResultRoutine)) self.__call_operation(session_id, nodes, operation_name, parameters_map) return (session_id, 0, 'Operation %s is called on nodes' % (operation_name,)) except Exception, err: logger.error('calOperationOnNodes error: %s' % err) if session_id: self._active_operations.delete(session_id) self.__delete_session(session_id) return (session_id, 1, str(err))
def callOperationOnCluster(self, user_name, cluster_name, operation_name, parameters_map, onOperationResultRoutine): ''' call operation on all nodes in cluster @user_name (string) name of user who calling operation (nm_user.name) @cluster_name (string) name of cluster (nm_cluster.cluster_sid) @operation_name (string) name of operatiob (nm_operation.name) @parameters_map (dict {param_name, param_value}) operation input parameters @onOperationResultRoutine (func) callback routine for receiving operation results This routine should has following spec: (operation_name, session_id, status, ret_params_map), where operation_name (string) - name of operation session_id (integer) - identifier of session status (integer) - status of operation ret_params_map (dict) - {<node_hostname>: { <param_name> : <param_value>, ...}, ...} @return (session_id, ret_code, ret_message) ''' try: session_id = 0 logger.info('CALL operation %s on cluster %s'%(operation_name, cluster_name)) operation_id, node_type_id, timeout = self.__get_operation_info(operation_name) #select all ACTIVE nodes in cluster rows = self._dbconn.select("SELECT N.id, N.hostname, N.node_type FROM NM_NODE N, NM_CLUSTER C \ WHERE C.id = N.cluster_id AND C.cluster_sid=%s AND N.admin_status<>%s\ AND N.current_state=%s",(cluster_name, NAS_NOTACTIVE, NCS_UP)) if not rows: raise Exception('No active nodes found for cluster with name %s' % cluster_name) nodes = self.__form_nodes(rows, node_type_id) if not nodes: raise Exception('Not found nodes for operation %s in cluster %s.'%(operation_name, cluster_name)) session_id = self.__insert_operation_into_db(operation_id, user_name, nodes) self._active_operations.put(session_id, OperationResult(operation_name, timeout, onOperationResultRoutine)) self.__call_operation(session_id, nodes, operation_name, parameters_map) return (session_id, 0, 'Operation %s is called on cluster %s' % (operation_name, cluster_name)) except Exception, err: logger.error('callOperationOnCluster error: %s' % err) if session_id: self._active_operations.delete(session_id) self.__delete_session(session_id) return (session_id, 1, str(err))
def _set_new_hostname(self, uuid): hostname = self.__get_cmdline_hostname() if hostname is None: logger.info('Setting defaut hostname') hostname = 'NODE-%s' % uuid.split('-')[-1] else: logger.info('Hostname specified by kernel command line: %s'%hostname) run_command(['hostname', hostname]) ret,out,err = run_command(['dhcpcd','--rebind', '--waitip', 'eth0']) if ret: raise Exception('dhcpcd eth0 error: %s'%err) run_command(['/etc/init.d/syslog-ng', 'reload']) run_command(['/etc/init.d/ntp-client', 'restart']) return hostname
def run(self): logger.info('NodesMonitor started!') while not self.__stoped: try: t0_point = datetime.now() rows = self.__dbconn.select("SELECT N.hostname, N.current_state FROM nm_node N, nm_cluster C \ WHERE N.cluster_id=C.id AND N.admin_status=%s AND C.status=%s", (ANS_ACTIVE, CS_ACTIVE)) logger.debug('NodesMonitor: Selected %i nodes for checking state'%len(rows)) for row in rows: self.__nodes_queue.put((row[0], row[1])) self.__nodes_queue.join() except Exception, err: logger.error('NodesMonitor failed: %s' % err) finally:
def run(self): logger.info('%s started!'%self.getName()) while True: session_id = None node = None ret_params_map = {} ret_code = RC_OK ret_message = '' try: sock = self.queue.get() if sock == STOP_THREAD_EVENT: logger.info('%s stopped!'%self.getName()) break data = '' while True: received = sock.recv(BUF_SIZE) if not received: break data += received if len(received) < BUF_SIZE: break logger.debug('%s receive: %s'%(self.getName(),data)) if not data: raise Exception('empty data block') json_object = json.loads(data) self.__onProcessResult(json_object) except Exception, err: ret_message = '%s error: %s' % (self.getName(), err) ret_code = RC_ERROR logger.error(ret_message) finally:
def run(self): try: self.stoped = False self._remount_devfs()#FIXME uuid = self._get_uuid() hostname = self._set_new_hostname(uuid) mac_address, ip_address = self._get_interface_info() login, password = self._create_user() processor, memory = self._get_hardware_info() caller = friBase.FriCaller() packet = { 'uuid': uuid, 'hostname': hostname, 'ip_address': ip_address, 'mac_address': mac_address, 'login': login, 'password': password, 'processor': processor, 'memory': memory } logger.info('BOOT EVENT: %s'%packet) while not self.stoped: code,msg = caller.call(MANAGEMENT_SERVER, packet, LISTENER_PORT) if code != 0: logger.error('Boot event send error: %s'%msg) else: break time.sleep(SLEEP_SENDER_TIME) return code except Exception, err: logger.error("Boot event sender error: %s"%err)
def beforeCall(self, operation, call_object, parameters): if call_object.object == CLUSTER: raise Exception('Synchronize operation should be runned on one node only!') if len(call_object.object_value) > 1: raise Exception('Synchronize operation should be runned on one node only!') node = call_object.object_value[0] rows = self.dbConn.select('SELECT N.id, N.logic_name, N.architecture, NT.type_sid, \ C.cluster_sid FROM nm_node N, nm_node_type NT, nm_cluster C\ WHERE C.id=N.cluster_id AND NT.id=N.node_type AND N.hostname=%s', (node,)) if not rows: msg = 'Node %s shoud be binded with cluster and type for call sync operation'%node logger.info(msg) return 22, msg node_id, logic_name, arch, type_name, cluster_sid = rows[0] #select node config parameters rows = self.dbConn.select('SELECT CS.parameter_name, C.parameter_value \ FROM nm_config_spec CS, nm_config C \ WHERE CS.object_type_id=%s AND C.object_id=%s', (NODE_CONFIG_TYPE, node_id )) in_params = {'logic_name': logic_name, 'arch': arch, 'node_type': type_name, 'cluster_sid': cluster_sid} for name, value in rows: in_params[name] = value parameters.update(in_params)
finally: #calculate timeout dt = datetime.now() - t0_point wait_time = timedelta(0, self.__monitor_timeout) - dt if wait_time.days == 0: #sleep cycle for i in range(wait_time.seconds): #check daemon state every second if self.__stoped: break time.sleep(1.0) time.sleep(wait_time.microseconds * 0.000001) logger.info('NodesMonitor stoped!') class MonitorWorkerThread(threading.Thread): def __init__(self, queue, wait_timeout): self.queue = queue self.wait_timeout = wait_timeout self.dbconn = DatabaseConnection() threading.Thread.__init__(self) def run(self): logger.info('%s started!'%self.getName()) fri_caller = FriCaller()
client = DBUSInterfaceClient() return client except ImportError, err: logger.warning('Boot manager require nodes manager for automatic changing hostname.') return None #-------------------------------------------------------------------------------- if __name__ == '__main__': try: logger.info('Boot event listener starting...') listener = BootEventListener() def stop(s, p): global listener try: logger.info('Boot event listener stoping...') listener.stop() except Exception, err: logger.error('Stoping boot event listener error: %s'%err) signal.signal(signal.SIGINT, stop) listener.start()
def run(self): logger.info('FriListenerThread started!') self.server.start() logger.info('FriListenerThread stopped!')
if not in_line: #EOF occured break host, facility, priority, level, tag, program, isodate, msg = in_line.split('[-]') host = host.strip() node_id = NODES.get(host, None) if node_id is None: rows = dbconn.select("SELECT id FROM nm_node WHERE hostname=%s",(host,)) if rows: NODES[host] = rows[0][0] node_id = rows[0][0] dbconn.modify("INSERT INTO logs (node_id, host, facility, priority, level, tag, program, log_timestamp, msg) \ VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)", (node_id, host, facility.strip(), priority.strip(), level.strip(), tag.strip(), program.strip(), isodate.strip(), msg.strip())) if __name__ == "__main__": try: writer_loop() logger.info('Log writer exiting...') except Exception, ex: logger.error("log writer main loop caught exception: %s" % ex) sys.exit(1)