def __init__(self, session_timeout_s=20, default_worker_uids=[], default_session_uid=0): self.all_worker_uids = default_worker_uids self.session_timeout = session_timeout_s self.available_worker_uids = self.all_worker_uids self.completed_session = utils.to_named_thing({ "session_uid": 0, "worker_uids": [], "start_time": 0.0, "end_time": 0.0, "completed_worker_uids": [], "work_params": {} }) self.current_session = utils.to_named_thing({ "session_uid": default_session_uid, "worker_uids": default_worker_uids, "start_time": 0.0, "end_time": 0.0, "completed_worker_uids": [], "work_params": {} }) self.first_session_started = False # Rollout session for workers is initially inactive self.session_active = False
def __init__(self, client_config, client_queue, debug=False): self.config = client_config self.queue = client_queue self.worker_uid = client_config.worker_uid self.client = mqtt.Client() self.client.on_message = self.on_message self.client.on_connect = self.on_connect self.client.connect(self.config.broker_url, self.config.broker_port) for topic in self.config.topics: self.client.subscribe(topic.name, qos=1) self.listen_topics = [ e.name for e in self.config.topics if e.action == "listen" ] self.register_topic = [ e.name for e in self.config.topics if e.action == "register" ][0] self.publish_topic = [ e.name for e in self.config.topics if e.action == "publish" ][0] registration_info_dict = { "worker_uid": self.worker_uid } self.registration_info = utils.to_named_thing(registration_info_dict)
def manager_process(manager_client, worker_msg_queue, reg_queue, trainer_in_queue, trainer_out_queue): # UIDs of workers in the current active work session. current_worker_uids = manager_client.config.worker_uids # Full configs of workers to include in the next work session. next_worker_uids = [] # Parameters of work that has been completed, one for each worker. completed_work = [] # UIDs of workers who have completed work. completed_worker_uids = [] session_uids = [0] current_session = utils.to_named_thing({ "session_uid": "0", "worker_uids": [] }) session_manager = SessionManager() while True: print("\n\n\nGrabbing new workers") new_workers = utils.extract_json_from_queue(reg_queue) session_manager.add_workers(new_workers) if session_manager.session_active: print("Session is active, looking for completed work") completed_work = utils.extract_json_from_queue(worker_msg_queue) if len(completed_work) > 0: print("\tFound some completed work:", completed_work) if session_manager.attempt_end_session(completed_work): print("\tSuccessfully ended the session", session_manager.current_session.session_uid) # If all work is completed give it to trainer for training. training_table_names = [c.table_name for c in completed_work] session_request = session_manager.session_request( training_table_names) print("\t\tPutting in the session request:", session_request) trainer_in_queue.put(str(session_request)) #session_params = session_manager.start_session() print("Grabbing list of new sessions") new_work = utils.extract_json_from_queue(trainer_out_queue) print("all worker uids:", session_manager.all_worker_uids) if (len(new_work) == 0) \ and (not session_manager.first_session_started) \ and (len(session_manager.all_worker_uids) > 0 ): session_request = session_manager.session_request() print("Putting a session request into the trainer queue:", session_request) trainer_in_queue.put(str(session_request)) elif (len(new_work) > 0): print("New work is available, making new work active") # If new work is available for the workers, mark the work as new in # the session manager and publish it to the workers. session_manager.start_session(new_work[-1]) manager_client.publish(str(session_manager.current_session)) time.sleep(2)
def session_request(self, table_names=[]): session_request = utils.to_named_thing({ "session_uid": self.current_session.session_uid + 1, "worker_uids": self.available_worker_uids, #"table_names": self.current_session.work_params.new_table_names "table_names": table_names }) return session_request
def main(argv): config_dict = { "broker_url": "192.168.1.4", "broker_port": 1883, "topics": [ { "name": "manager", "action": "listen" }, { "name": "worker", "action": "publish" }, { "name": "register", "action": "register" } ], "worker_uid": 1, "sql_hostname": "192.168.1.4", "sql_username": "******", "sql_key_loc": "sqlkey.txt", "sql_dbname": "XPDB" } if len(argv) > 1: config_filename = argv[1] if os.path.exists(config_filename): config_file = open(config_filename, "r") config_json = "".join(config_file.readlines()) config_dict = json.loads(config_json, parse_int=int, parse_float=float) config_dict["worker_uid"] = int(config_dict["worker_uid"]) config_dict["broker_port"] = int(config_dict["broker_port"]) print("Using config dict from disk,", argv[1]) config = utils.to_named_thing(config_dict) #print(json.loads(str(config))) spinup_worker(config) print("exiting main loop")
def main(argv): test_config_dict = { "worker_uids": [], "num_rollouts": 50, "database_name": "XPDB", "ftp": "None", "broker_url": "192.168.1.4", "broker_port": 1883, "topics": [{ "name": "manager", "action": "publish" }, { "name": "worker", "action": "listen" }, { "name": "register", "action": "listen" }], "sql_hostname": "192.168.1.4", "sql_username": "******", "sql_key_loc": "sqlkey.txt", "sql_dbname": "XPDB", "fs_hostname": "192.168.1.15", "fs_port": 8000 } test_config = utils.to_named_thing(test_config_dict) spinup_server(test_config)