def dist_model(src_model, node_info, remote_path="xt_archive"): """ Distribute model to remote node. :param src_model: :param node_info: :param remote_path: :return: """ # optimize local node check if node_info[0] in ("127.0.0.1", ): return None server_ip = get_host_ip() if node_info[0] == server_ip: return None _basename = os.path.basename(src_model) if _basename in ("none", "None", None): # init model with (none, none) return None target_file = glob.glob("{}*".format(src_model)) _ip, _user, _password = node_info destination_model = os.path.join("/home", _user, remote_path + "/") with Connection(_ip, user=_user, connect_kwargs={"password": _password}) as connect: # fixme: multi-case running on the same node _workspace = os.path.join("/home", _user, remote_path) for _item in target_file: logging.debug("dist model: {}--> {}".format( _item, destination_model)) connect.put(_item, destination_model) return [destination_model + _basename]
class HostnameFilter(normal_logging.Filter): """Host name filter.""" hostname = platform.node() hostip = get_host_ip() def filter(self, record): """Filter.""" record.hostname = HostnameFilter.hostname record.hostip = HostnameFilter.hostip return True
def distribute_xt_if_need(config, remote_env, remote_path="xt_archive"): """ Distribute Xingtian sourcecode among use's node configure. :param config: config instance from config.yaml :param remote_env: remote conda environment path :param remote_path: path to store the wheel file. 'xt_archive' default. :return: """ local_ip = get_host_ip() # check could if distribute or not remote_ip_list = list() for _key in ( "node_config", "test_node_config", ): if _key not in config.keys(): continue for _ip, _user, _password in config.get(_key): # local need not distribute if _ip in (local_ip, "127.0.0.1"): continue remote_ip_list.append((_ip, _user, _password)) if not remote_ip_list: logging.debug("Don't distribute xingtian without remote ip set.") return True dist_path = _package_xt() if not remote_env: logging.fatal("must assign remote env in yaml.") for _ip, _user, _password in remote_ip_list: with Connection(_ip, user=_user, connect_kwargs={"password": _password}) as connect: _workspace = os.path.join("/tmp") target_whl = glob.glob("{}/xingtian*.whl".format(dist_path)) logging.info("found dist: {}".format(target_whl)) for _whl in target_whl: _name = os.path.basename(_whl) _remote_cmd = "pip install {}/{} --upgrade --force-reinstall --no-deps".format( _workspace, _name) logging.info( "re-install xingtian in remote-{} conda env {} >>> \n" "{}".format(_ip, remote_env["conda"], _remote_cmd)) connect.put(os.path.join(dist_path, _name), remote=_workspace) with connect.prefix("export PATH={}/bin:$PATH".format( remote_env["conda"])): connect.run(_remote_cmd, pty=False)
def launch_broker(config_info, verbosity="info"): """Run actor in local node, unify the act launcher api.""" node_config_list = config_info.get("node_config", DEFAULT_NODE_CONFIG) broker_controller = Controller(node_config_list.copy()) # controller.start() server_port_info = broker_controller.port_info # port for broker client train_port = server_port_info["recv"]["port"] predict_port = list([_d["port"] for _d in server_port_info["send"]]) server_ip = get_host_ip() local_ip = "127.0.0.1" for index, data in enumerate(node_config_list): ip = data[0] if ip in (server_ip, local_ip): try: launch_local_broker(index, train_port, predict_port[index], local_ip, verbosity) logging.info("launch local broker with lib success") except BaseException as err: logging.exception(err) else: user = data[1] passwd = data[2] _remote_env = config_info.get("remote_env") if not _remote_env: logging.fatal("remote node must assign conda env") launch_remote_broker( user, passwd, ip, server_ip, index, train_port, predict_port[index], remote_env=_remote_env, verbosity=verbosity, ) return broker_controller
def launch_broker(config_info, start_port=None, verbosity="info"): """Run actor in local node, unify the act launcher api.""" node_config_list = config_info.get("node_config", DEFAULT_NODE_CONFIG) broker_master = BrokerMaster(node_config_list.copy(), start_port) broker_master.start() start_port = broker_master.start_port server_ip = get_host_ip() for index, data in enumerate(node_config_list): ip = data[0] user = data[1] passwd = data[2] if ip in (server_ip, "127.0.0.1"): try: launch_local_broker(index, start_port, server_ip, verbosity) logging.info("launch local broker with lib success") except BaseException as err: logging.exception(err) else: _remote_env = config_info.get("remote_env") if not _remote_env: logging.fatal("remote node must assign conda env") launch_remote_broker( user, passwd, ip, server_ip, index, start_port, remote_env=_remote_env, verbosity=verbosity, ) return broker_master