def main(name, heartbeat_port, server_port, task_control_module): """ Slave controller main program Parameters: name - The name of the slave as known to the master controller. This name is included in the heartbeat messages. heatbeat_port - The TCP port to send heatbeat messages to. server_port - The TCP port the command server bind to. task_control_module - The name of the module (in sip_slave) to load the task load and unload functions from. """ logger.info('Slave controller "{}" starting'.format(name)) # Define the modules that the task load and unload functions will be # loaded from config.task_control_module = task_control_module # Now import the slave service. from sip_slave.slave_service import SlaveService # Create a heartbeat sender to MC heartbeat_sender = heartbeat.Sender(name, heartbeat_port) # Create and start the RPC server server = ThreadedServer(SlaveService, port=server_port) t = threading.Thread(target=server.start) t.setDaemon(True) t.start() # Send heartbeats while True: heartbeat_sender.send(config.state) time.sleep(1)
def run(self): logger.info('slave exiting') # Give time for the rpc to return time.sleep(1) # Exit the application os._exit(0)
def run(self): """ Thread run routine """ logger.info('starting unconfiguration') for slave, status in config.slave_status.items(): if status['state'].current_state() == 'Busy': type = status['type'] task_control.unload(config.slave_config[type], status) logger.info('unconfigure done')
def run(self): """ Thread run routine """ logger.info('starting configuration') # Go through the slave map and start all the tasks that are marked # as being required for the system to be online for task, cfg in config.slave_config.items(): if cfg.get('online', False): slave_control.start(task, task)
def unload(task): """ Unload the task """ logger.info('unloading task {}'.format(task[0])) # Stop the heartbeat poller config.poller_run = False # Kill the sub-process config.subproc.kill() # Reset state config.state = 'idle'
def run(): # The state of the task _state = 'starting' # Read port number if len(sys.argv) < 2: port = 6477 else: port = int(sys.argv[1]) if len(sys.argv) < 3: run_time = 60 else: run_time = int(sys.argv[2]) # Define a process name to be sent to the socket process_name = 'exec_eng' # Install handler to respond to SIGTERM signal.signal(signal.SIGTERM, _sig_handler) # Create process sender process_sender = heartbeat_task.Sender(process_name, port) start_time = time.time() while _state != 'finished': # Create a timestamp ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') # Change the state w.r.t. the time if time.time() - start_time > run_time + 10.0: logger.info('exec engine has finished') _state = 'finished' elif time.time() - start_time > 10.0: if _state == 'starting': logger.info('exec engine now busy') _state = 'busy' # Add current state to the heartbeat sequence st += ' State: ' + _state + ' Component: ' # Sent to the socket process_sender.send(st) # Wait 1 sec time.sleep(1)
def _start_docker_slave(name, cfg, status): """ Start a slave controller that is a Docker container NB This only works on localhost """ # Improve logging soon! logging.getLogger('requests').setLevel(logging.DEBUG) # Create a Docker client client = Client(version='1.21', base_url=cfg['engine_url']) # Create a container and store its id in the properties array host = config.resource.allocate_host(name, {'launch_protocol': 'docker'}, {}) image = cfg['docker_image'] heartbeat_port = config.resource.allocate_resource(name, "tcp_port") rpc_port = config.resource.allocate_resource(name, "tcp_port") task_control_module = cfg['task_control_module'] logger_address = \ netifaces.ifaddresses('docker0')[netifaces.AF_INET][0]['addr'] container_id = client.create_container( image=image, command=[ '/home/sdp/integration-prototype/slave/bin/slave', name, str(heartbeat_port), str(rpc_port), logger_address, task_control_module, ])['Id'] # Start it client.start(container_id) # Fill in the docker specific entries in the status dictionary info = client.inspect_container(container_id) ip_address = info['NetworkSettings']['IPAddress'] status['address'] = ip_address status['container_id'] = container_id # Fill in the generic entries status['rpc_port'] = rpc_port status['heartbeat_port'] = heartbeat_port status['sip_root'] = '/home/sdp/integration-prototype' logger.info('"{}" started in container {} at {}'.format( name, container_id, ip_address))
def run(self): """ Thread run routine """ logger.info('starting shutdown') # Shut down any slaves that are still running for slave, status in config.slave_status.items(): if status['state'].current_state() != '_End': slave_control.stop(slave, status) # Shut down the log server print('Terminating logserver, pid ', config.logserver.pid) os.kill(config.logserver.pid, signal.SIGTERM) logger.info('shutdown done') # Give the rpc service a change to send a reply time.sleep(1) os._exit(0)
def _start_ssh_slave(name, cfg, status): """ Start a slave controller that is a SSH client """ # Improve logging setup!!! logging.getLogger('plumbum').setLevel(logging.DEBUG) # Find a host that supports ssh host = config.resource.allocate_host(name, {'launch_protocol': 'ssh'}, {}) # Get the root of the SIP installation on that host sip_root = config.resource.sip_root(host) # Allocate ports for heatbeat and the RPC interface heartbeat_port = config.resource.allocate_resource(name, "tcp_port") rpc_port = config.resource.allocate_resource(name, "tcp_port") # Get the task control module to use for this task task_control_module = cfg['task_control_module'] # Get the address of the logger (as seen from the remote host) logger_address = _find_route_to_logger(host) ssh_host = SshMachine(host) import pdb # pdb.set_trace() try: py3 = ssh_host['python3'] except: logger.fatal('python3 not available on machine {}'.format(ssh_host)) logger.info('python3 is available at {}'.format(py3.executable)) # Construct the command line to start the slave cmd = py3[os.path.join(sip_root, 'slave/bin/slave')] \ [name][heartbeat_port][rpc_port][logger_address][task_control_module] ssh_host.daemonic_popen(cmd, stdout='{}_sip.output'.format(name)) # Fill in the status dictionary status['address'] = host status['rpc_port'] = rpc_port status['heartbeat_port'] = heartbeat_port status['sip_root'] = sip_root logger.info(name + ' started on ' + host)
def run(self): while config.poller_run: # Listen to the task's heartbeat comp_msg = self._heartbeat_comp_listener.listen() # Extract a task's state state_task = _get_state(comp_msg) # If the task state changes log it if state_task != self._state_task_prev: logger.info(comp_msg) self._state_task_prev = state_task # Update the controller state if state_task == 'starting' or state_task == 'state1' or \ state_task == 'state2': config.state = 'busy' else: config.state = state_task time.sleep(1)
def load(task): """ load the task Some sort of task monitoring process should also be started. For 'internal' tasks this means checking that the task has is sending heartbeat messages """ _state_task = 'off' _state_task_prev = 'off' # Extract the port number port = int(task[1]) # Start a task logger.info('Starting task {}'.format(task[0])) config.subproc = subprocess.Popen(task) # Create a heartbeat listener to listen for a task timeout_msec = 1000 heartbeat_comp_listener = heartbeat_task.Listener(timeout_msec) heartbeat_comp_listener.connect('localhost', port) config.poller = _HeartbeatPoller(heartbeat_comp_listener) config.poller_run = True config.poller.start()
def on_disconnect(self): logger.info("master controller client controller disconnected")
def __init__(self, sm): logger.info('{0} state timed-out'.format(sm._name))
def __init__(self, sm): logger.info('{0} state finished'.format(sm._name))
def __init__(self, sm): logger.info('{0} state online'.format(sm._name))
def __init__(self, sm): logger.info('{0} state loading'.format(sm._name))
def __init__(self, sm): logger.info('state->degraded')
def testWarn(self): logger.info('A warn message')
def testInfo(self): logger.info('An info message')
def __init__(self, sm): logger.info('state->unavailable')
def __init__(self, sm): logger.info('state->standby')
def testError(self): logger.info('An error message')
def testFatal(self): logger.info('A fatal message')
def __init__(self, sm): logger.info('state->unconfiguring')
def run(self): """ Thread run routine """ logger.info('starting capability ' + self._args[0]) slave_control.start(self._args[0], self._args[1])
def testDebug(self): logger.info('A debug message')