def __init__(self, master, websocket, id): self.master = master self.websocket = websocket self.id = id # This is syntax for sending message through a websocket. # client.send.<action>(data) self.send = Sender(self.send_data)
def __init__(self, context): log.info("Master initialized") # wsgi context, unused: self.context = context # Slaves send data in via this socket self.slave_in = context.socket(zmq.PULL) self.slave_in.bind("tcp://*:{}".format(SLAVE_MASTER_PORT)) # listen_to_slaves handles any incoming messages gevent.spawn(self.listen_to_slaves) # This broadcasts to all slaves simultaneously. # TODO: Selective slave message sending self.slave_out = context.socket(zmq.PUB) self.slave_out.bind("tcp://*:{}".format(SLAVE_PUB_PORT)) # What a clever boy I am (clever code is bad code) # Allow self.slaves.<action>(data) magic self.slaves = Sender(self.broadcast_data_to_slaves) # Same for clients (where client is a browser connected via websocket) self.clients = Sender(self.broadcast_data_to_clients)
def __init__(self, context): self.context = context # This socket sends messages to master self.socket_out = context.socket(zmq.PUSH) self.socket_out.connect("tcp://localhost:{}".format(SLAVE_MASTER_PORT)) # Syntax allowing self.master.<action>(data) self.master = Sender(self.send_to_master) # This socket receives broadcasts from master self.socket_in = context.socket(zmq.SUB) self.socket_in.setsockopt(zmq.SUBSCRIBE, "") self.socket_in.connect("tcp://localhost:{}".format(SLAVE_PUB_PORT)) self.sink_host = '{}:{}'.format(self.hostname(), SLAVE_SINK_PORT) self.uuid = uuid4().hex
class Slave(Actionable): """ This class manages connections to the master and handles client-server logistics. Actions on it are run by master. """ id = None last_bw_out = None bw_out = 0 total_bw_out = 0 last_bw_in = None bw_in = 0 total_bw_in = 0 running_tests = [] test_runner = None sink_obj = None sink = None client_tests = {} message_lock = BoundedSemaphore(1) def __init__(self, context): self.context = context # This socket sends messages to master self.socket_out = context.socket(zmq.PUSH) self.socket_out.connect("tcp://localhost:{}".format(SLAVE_MASTER_PORT)) # Syntax allowing self.master.<action>(data) self.master = Sender(self.send_to_master) # This socket receives broadcasts from master self.socket_in = context.socket(zmq.SUB) self.socket_in.setsockopt(zmq.SUBSCRIBE, "") self.socket_in.connect("tcp://localhost:{}".format(SLAVE_PUB_PORT)) self.sink_host = '{}:{}'.format(self.hostname(), SLAVE_SINK_PORT) self.uuid = uuid4().hex def send_to_master(self, data): """ Sends a bson serialized message to master, inserting slave specific information such as id :param data: Message data to send action: (required) RPC to call on serverside slave instance """ if self.id: data['slave_id'] = self.id if data['action'] != 'heartbeat': log.debug('MASTER->{}'.format(data['action'])) self.socket_out.send(bson.dumps(data)) if data['action'] != 'heartbeat': log.debug('MASTER->{} end'.format(data['action'])) def send_to_sink(self, data): """ Send bson encoded data to the sink for collation :param data: Message data to send """ if self.id: data['slave_id'] = self.id if self.sink_obj: self.sink_obj.send_to_sink(data) else: log.warn('{}->SINK failed, no sink'.format(data['action'])) def listen_to_master(self): """ Wait for messages from the server and delegate them """ log.info("Listening to master") while True: self.handle_master_message(self.socket_in.recv()) def handle_master_message(self, msg): """ Decode a message from the server, if it matches our id or has no id decode and run it. :param msg: BSON encoded message from the server action: Local function to run slave_id: If present and not our id ignore message """ data = bson.loads(msg) if 'action' in data: if 'slave_id' not in data or data['slave_id'] == self.id: log.debug('{}<-MASTER'.format(data['action'])) self.run_action(data) log.debug('{}<-MASTER end'.format(data['action'])) else: log.warn('Server sent message with no action') def heartbeat(self): """ Generate a status report and send it to master """ # physical_memory = psutil.phymem_usage() virtual_memory = psutil.virtual_memory() swap_memory = psutil.swap_memory() load = psutil.cpu_percent(interval=1) network = psutil.net_io_counters() hostname = self.hostname() if not self.last_bw_out: self.last_bw_out = network.bytes_sent self.bw_out = (network.bytes_sent - self.last_bw_out)/HEARTBEAT_PERIOD self.total_bw_out += self.bw_out * HEARTBEAT_PERIOD self.last_bw_out = network.bytes_sent if not self.last_bw_in: self.last_bw_in = network.bytes_recv self.bw_in = (network.bytes_recv - self.last_bw_in)/HEARTBEAT_PERIOD self.total_bw_in += self.bw_in * HEARTBEAT_PERIOD self.last_bw_in = network.bytes_recv self.master.heartbeat({ 'hostname': hostname, 'memory': { # 'physical': (physical_memory.used, physical_memory.total), 'physical': (0, 0), 'virtual': (virtual_memory.used, virtual_memory.total), 'swap': (swap_memory.used, swap_memory.total) }, 'load': load, 'bandwidth': { 'in': (self.bw_in, self.total_bw_in), 'out': (self.bw_out, self.total_bw_out) }, 'generated': int(time()), 'sink_id': self.sink_obj.id if self.sink_obj else None, 'slave_uuid': self.uuid }) def heartbeat_forever(self): log.info('Beating heart forever') while True: self.heartbeat() gevent.sleep(HEARTBEAT_PERIOD) @action def set_id(self, data): """ Set id if it matches our uuid :param data: Message specific data: slave_uuid: UUID to check against ours new_slave_id: Id to set if match """ if 'slave_uuid' not in data or 'new_slave_id' not in data: log.warn('receiving id without slave_uuid/new_slave_id') return if data['slave_uuid'] == self.uuid: self.id = data['new_slave_id'] log.info('Received id: {}'.format(self.id)) @action def set_sink(self, data): """ Set sink :param data: Message specific data: sink_id: Sink id to connect to sink_host: Host to connect to :return: """ if 'sink_id' in data and 'sink_host' in data and data['sink_host'] == self.sink_host: self.connect_to_sink(data['sink_id'], data['sink_host']) else: log.warn('Incomplete sink call') @staticmethod def hostname(): return socket.gethostname() def connect_to_sink(self, sink_id, host): """ Attempt to connect to the given sink, and create a local sink instance :param sink_id: ID to assign to the sink :param host: host to connect to """ log.info('Attempting to connect to sink({}) - {}'.format(sink_id, host)) if self.sink_obj: log.info('Closing previous sink') self.sink_obj.close() # Create our sink self.sink_obj = Sink(id=sink_id, host=host, slave=self) # Initialize it, and tell master if successful if self.sink_obj.setup(): log.info('Ok!') # RPC for sink self.sink = Sender(self.send_to_sink) self.master.connected_to_sink({ 'sink_id': self.sink_obj.id }) @action def quit(self, data): """ If this RPC is run master told this slave to quit """ log.info('Master told us to quit, quitting.') sys.exit(0) @action def run_test(self, data): """ Receive a test, instantiate it and run :param data: Message specific data: client_id: Client that started the test test: Test data to hand off to the test :return: """ log.info('slave({}).run_test client({})'.format(self.id, data['client_id'])) if not self.sink_obj: log.warn('Error, no sink') return if 'test' in data: client_id = data['client_id'] test = Test(self.sink, data) # Only allow one test at a time if client_id in self.client_tests: log.info('Stopping previous test') self.client_tests[client_id].stop() self.client_tests[data['client_id']] = test test.run() else: log.warn('No test given') @action def stop_test(self, data): """ Stop any tests from that client :param data: Message specific data: client_id: Client to stop tests for """ client_id = data['client_id'] if client_id in self.client_tests: log.info('slave({}).stop_test client({})'.format(self.id, client_id)) self.client_tests[client_id].stop() del self.client_tests[client_id] else: self.master.test_stopped()
class Client(Actionable): """ This class is the local representation of a remote client (browser) When a websocket message is received it is routed into the proper local instance When the master sends a message to a client it goes through this class """ def __init__(self, master, websocket, id): self.master = master self.websocket = websocket self.id = id # This is syntax for sending message through a websocket. # client.send.<action>(data) self.send = Sender(self.send_data) def send_data(self, data): self.websocket.send(json.dumps(data)) def handle_websocket_message(self, msg): """ This handles any message that comes in by looking at the action key and seeing if it is a local method, then calling it :param msg: JSON encoded data from the websocket """ if msg: data = json.loads(msg) if "action" in data: self.run_action(data) @action def get_id(self, data): self.send.set_id({"client_id": self.id}) @action def quit(self, data): """ Websocket telling a slave to quit :param data: Message specific data: slave_id: slave to tell to quit """ if "slave_id" in data: self.master.remove_slave(data["slave_id"]) else: self.send.error({"error": "Id not specified in data"}) @action def request_slaves(self, data): """ Websocket requesting slave data """ data = {"slaves": dict((slave.id, slave.last_beat) for slave in self.master.slave_registry.values())} # Send a message with action=receive_slaves to the browser self.send.receive_slaves(data) @action def set_sink(self, data): """ Set the sink on a particular slave, which sets the sink for all slaves on that host :param data: message specific data: slave_id: Slave to set the sink for (unused) sink_id: Slave to set the sink to """ if "sink_id" not in data or "slave_id" not in data: raise Exception("Missing sink_id or slave_id") # Let the master handle actually doing this self.master.set_sink(data["slave_id"], data["sink_id"], self) @action def request_available_tests(self, data): """ Client asking to load tests :param data: """ tests_glob = os.path.join(os.path.dirname(__file__), TEST_DIR, "*.json") files = glob.glob(tests_glob) files_stripped = [os.path.basename(filename)[:-5] for filename in files] # Send list of files to the client self.send.receive_available_tests({"tests": files_stripped}) @action def request_test(self, data): """ Client asking to load a test :param data: Message specific data: name: filename to load """ filename = os.path.join(os.path.dirname(__file__), TEST_DIR, data["name"] + ".json") # TODO: Prevent path traversal if os.path.exists(filename): with open(filename) as file: return self.send.receive_test({"test": json.loads(file.read())}) return self.send.error({"error": "Test not found"}) @action def save_test(self, data): """ Save a test to a file :param data: Message specific data: name: Filename to save to test: data representing a test """ if "test" in data: test = data["test"] if "name" in test: # TODO: Prevent path traversal filename = os.path.join(os.path.dirname(__file__), TEST_DIR, test["name"] + ".json") with open(filename, "w") as file: file.write(json.dumps(test)) self.send.save_successful() @action def delete_test(self, data): """ Delete a test :param data: Message specific data: test_name: Name of test to delete """ if "test_name" in data: # TODO: Prevent path traversal filename = os.path.join(os.path.dirname(__file__), TEST_DIR, data["test_name"] + ".json") if os.path.exists(filename): os.remove(filename) else: return self.send.error({"error": "Test not found"}) else: return self.send.error({"error": "Cannot delete test, not found"}) @action def run_test(self, data): """ Send a test to all slaves to run :param data: Message specific data: test: Test data to run """ if "test" in data: data["client_id"] = self.id if "runs" in data["test"]: try: # Each slave gets a portion of the total runs data["runs"] = int(data["test"]["runs"]) / len(self.master.slave_registry) except ValueError: data["runs"] = 1 else: data["runs"] = 1 self.master.slaves.run_test(data) @action def stop_test(self, data): """ Halt all tests run by this client :param data: """ data["client_id"] = self.id if len(self.master.slave_registry) != 0: self.master.slaves.stop_test(data) else: self.send.test_stopped()
class MasterApplication(Actionable): """ This class represents the master controlling server. It tracks slaves, sinks, and clients. """ slave_registry = {} sink_registry = {} next_slave_id = 0 next_client_id = 0 client_registry = {} clients = Sender(None) removed_slaves = set() def __init__(self, context): log.info("Master initialized") # wsgi context, unused: self.context = context # Slaves send data in via this socket self.slave_in = context.socket(zmq.PULL) self.slave_in.bind("tcp://*:{}".format(SLAVE_MASTER_PORT)) # listen_to_slaves handles any incoming messages gevent.spawn(self.listen_to_slaves) # This broadcasts to all slaves simultaneously. # TODO: Selective slave message sending self.slave_out = context.socket(zmq.PUB) self.slave_out.bind("tcp://*:{}".format(SLAVE_PUB_PORT)) # What a clever boy I am (clever code is bad code) # Allow self.slaves.<action>(data) magic self.slaves = Sender(self.broadcast_data_to_slaves) # Same for clients (where client is a browser connected via websocket) self.clients = Sender(self.broadcast_data_to_clients) def broadcast_data_to_slaves(self, data): """ Send data to all slaves as a bson dump :param data: Data to serialize """ self.slave_out.send(bson.dumps(data)) def broadcast_data_to_clients(self, data): """ Send data to all web clients (in serial) as a json dump :param data: Data to serialize """ # TODO: parallelize for client in self.client_registry.values(): client.send_data(data) def __call__(self, environ, start_response): """ This class is called whenever a browser tries to connect. Note: This is green-threaded via gevent :param environ: The wsgi environment :param start_response: A helper to return http responses :return: """ # Respond to any websocket messages ws = environ["wsgi.websocket"] self.listen_to_websocket(ws) def listen_to_websocket(self, ws): """ Listen to incoming messages on the websocket and delegate them to a message handler. :param ws: Websocket to listen to """ client = self.add_client(ws) log.info("Websocket {} connected!".format(client.id)) # Listen for messages until they disconnect while True: try: msg = ws.receive() client.handle_websocket_message(msg) except WebSocketError as e: self.disconnect_client(client) break def get_next_client_id(self): """ :return: an incrementing id to identify clients """ self.next_client_id += 1 return self.next_client_id - 1 def add_client(self, ws): """ Create a new Client instance and store it in the client registry :param ws: Websocket to associate with the client so the instance can send messages :return: The client instance :rtype Client: """ id = self.get_next_client_id() client = Client(self, ws, id) self.client_registry[id] = client return client def disconnect_client(self, client): """ Close the connection to this specific client and deregister them :param client: Client to disconnect """ if client.id in self.client_registry: client.websocket.close() del self.client_registry[client.id] def listen_to_slaves(self): """ Delegate any messages received from the slaves to the proper handler :return: """ log.info("Listening to slaves") while True: self.handle_slave_message(self.slave_in.recv()) def handle_slave_message(self, msg): """ Slaves send in heartbeats and test results. Handle finding the associated local slave instance and interpreting the message. :param msg: A bson encoded dict with message data. action: A string representing the action being taken. Must have an associated function on the local slave class with the @action decorator slave_id: The id assigned to the slave. If this is not present the slave doesn't have one and should be sent one <message specific data> """ # TODO: Think through the security implications of letting slaves report their own ids and uuids data = bson.loads(msg) if "action" in data: # Slaves without a valid slave_id are unregistered and need to be created locally # TODO: Move slave creation into load_slave if "slave_id" not in data or data["slave_id"] not in self.slave_registry: if data["action"] == "heartbeat": # Heartbeats are typically the way slaves are discovered slave = self.new_slave(data) # TODO: handle slave creation failure else: log.error("Got non-heartbeat message from unknown slave") return else: # Load the local instance slave = self.load_slave(data) # TODO: handle save loading failure assert slave # run_action looks up the @action method on the slave and runs it with data slave.run_action(data) def load_slave(self, data): """ Inspect the message data and retrieve the associated slave :param data: Message data :return: The loaded slave :rtype Slave: """ if "slave_id" not in data: log.warn("load_slave without slave_id") return slave_id = data["slave_id"] if slave_id not in self.slave_registry: log.warn("load_slave couldn't find slave") return return self.slave_registry[slave_id] def new_slave(self, heartbeat_data): """ Create a slave from its heartbeat :param heartbeat_data: :return: The created Slave :rtype Slave: """ slave = Slave(self, self.get_next_slave_id()) # Modify the data, providing a slave_id to run_action if it needs it heartbeat_data["slave_id"] = slave.id # Kinda a hack, we want to get the sink_host before the action is run, # and that requires heartbeat data, so we store it if we have it slave.last_beat = heartbeat_data # Broadcast a message to all slaves associating that uuid with the local slave_id self.slaves.set_id({"new_slave_id": slave.id, "slave_uuid": heartbeat_data["slave_uuid"]}) # Slaves send test data to the sink, and the sink collates it before sending it to the server. # There is one sink per unique hostname sink = self.pick_sink(slave) # TODO: handle if no sink assert sink # Tell the new slave what sink to use self.slaves.set_sink({"sink_id": sink.id, "sink_host": sink.get_sink_host()}) # Register the slave so it can be found again self.slave_registry[slave.id] = slave return slave def get_next_slave_id(self): """ :return: An auto incrementing id to identify slaves """ self.next_slave_id += 1 return self.next_slave_id def pick_sink(self, slave): """ Pick a sink on the same host as the given slave :param slave: Slave to find a sink for :return: """ hostname = slave.get_hostname() if hostname: # If it is a new host then it is the sink! if hostname not in self.sink_registry: return slave else: # otherwise look up the hostname return self.sink_registry[hostname] def set_sink(self, slave_id, sink_id, client=None): """ Forcefully set the sink for all slaves on a host. :param slave_id: Unused :param sink_id: The slave_id to promote to a sink :param client: The client that requested the sink change """ # TODO: Move client error reporting into the client class if sink_id not in self.slave_registry: log.warn("set_sink to unknown sink") if client: client.send.error({"error": "Sink({}) not found".format(sink_id)}) return # Look up the hostname for the promoted sink sink_host = self.slave_registry[sink_id].get_sink_host() if not sink_host: log.warn("Could sink_host not found") if client: client.send.error({"error": "Sink({}) does not have a host yet".format(sink_id)}) return data = {"sink_host": sink_host, "sink_id": sink_id} # Tell all slaves with the host <sink_host> to connect to the given sink self.slaves.set_sink(data) def register_sink(self, sink): """ Record when a slave connects to a sink :param sink: Sink that has been connected to """ self.sink_registry[sink.get_hostname()] = sink def remove_slave(self, id): """ Remove, disconnect, and shut down the given slave id :param id: Slave to remove """ if id in self.slave_registry: log.info("Killing slave {}".format(id)) del self.slave_registry[id] # Any slave that reports to that id will quit # TODO: switch to slave.broadcast_to.quit() self.slaves.quit({"slave_id": id}) self.clients.slave_disconnected({"slave_id": id}) def check_slaves(self): """ Iterate through all slaves and check the last time they heartbeat If it is too long ago remove them :return: """ the_time = int(time()) # Since we modify the dict (and that messes with iteration) first get a static list of all of them slave_ids = self.slave_registry.keys() for slave_id in slave_ids: slave = self.slave_registry[slave_id] if the_time - slave.last_beat.get("generated", 0) > HEARTBEAT_PERIOD * BEATS_TO_KILL: self.remove_slave(slave_id) def watch_slaves(self): """ This will periodically check slaves :return: """ while True: self.check_slaves() gevent.sleep(HEARTBEAT_PERIOD)