def parse_host_configs(config, config_fpath, hosts): """ Parse the host_configs """ host_configs = esmon_common.config_value(config, "ssh_hosts") if host_configs is None: logging.info("can NOT find [ssh_hosts] in the config file [%s]", config_fpath) return 0 for host_config in host_configs: host_id = host_config["host_id"] if host_id is None: logging.error( "can NOT find [host_id] in the config of a " "SSH host, please correct file [%s]", config_fpath) return -1 hostname = esmon_common.config_value(host_config, "hostname") if hostname is None: logging.error( "can NOT find [hostname] in the config of SSH host " "with ID [%s], please correct file [%s]", host_id, config_fpath) return -1 mapping_dict = {esmon_common.ESMON_CONFIG_CSTR_NONE: None} ssh_identity_file = esmon_common.config_value( host_config, esmon_common.CSTR_SSH_IDENTITY_FILE, mapping_dict=mapping_dict) if host_id in hosts: logging.error( "multiple SSH hosts with the same ID [%s], please " "correct file [%s]", host_id, config_fpath) return -1 host = ssh_host.SSHHost(hostname, ssh_identity_file) hosts[host_id] = host return 0
def esmon_do_test(workspace, config, config_fpath): """ Run the tests """ # pylint: disable=too-many-return-statements,too-many-locals # pylint: disable=too-many-branches,too-many-statements esmon_virt_config_fpath = esmon_common.config_value( config, esmon_common.CSTR_ESMON_VIRT) if esmon_virt_config_fpath is None: logging.error( "no [esmon_virt] is configured, " "please correct file [%s]", config_fpath) return -1 ret = esmon_virt.esmon_virt(workspace, esmon_virt_config_fpath) if ret: logging.error("failed to install the virtual machines") return -1 ssh_host_configs = esmon_common.config_value(config, esmon_common.CSTR_SSH_HOSTS) if ssh_host_configs is None: logging.error( "can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_SSH_HOSTS, config_fpath) return -1 hosts = {} for host_config in ssh_host_configs: host_id = host_config[esmon_common.CSTR_HOST_ID] if host_id is None: logging.error( "can NOT find [%s] in the config of a " "SSH host, please correct file [%s]", esmon_common.CSTR_HOST_ID, config_fpath) return -1 hostname = esmon_common.config_value(host_config, esmon_common.CSTR_HOSTNAME) if hostname is None: logging.error( "can NOT find [%s] in the config of SSH host " "with ID [%s], please correct file [%s]", esmon_common.CSTR_HOSTNAME, host_id, config_fpath) return -1 local = esmon_common.config_value(host_config, esmon_common.CSTR_LOCAL_HOST) if local is None: logging.debug( "can NOT find [%s] in the config of SSH host " "with ID [%s], use [false] as default value", esmon_common.CSTR_LOCAL_HOST, host_id) local = False mapping_dict = {esmon_common.ESMON_CONFIG_CSTR_NONE: None} ssh_identity_file = esmon_common.config_value( host_config, esmon_common.CSTR_SSH_IDENTITY_FILE, mapping_dict=mapping_dict) if host_id in hosts: logging.error( "multiple SSH hosts with the same ID [%s], please " "correct file [%s]", host_id, config_fpath) return -1 host = ssh_host.SSHHost(hostname, ssh_identity_file, local=local) hosts[host_id] = host install_server_hostid = esmon_common.config_value( config, esmon_common.CSTR_INSTALL_SERVER) if install_server_hostid is None: logging.error( "can NOT find [%s] in the config file [%s], " "please correct it", esmon_common.CSTR_INSTALL_SERVER, config_fpath) return -1 if install_server_hostid not in hosts: logging.error( "SSH host with ID [%s] is NOT configured in " "[ssh_hosts], please correct file [%s]", install_server_hostid, config_fpath) return -1 install_server = hosts[install_server_hostid] collect_interval = esmon_common.config_value( config, esmon_common.CSTR_COLLECT_INTERVAL) if collect_interval is None: logging.error( "can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_COLLECT_INTERVAL, config_fpath) return -1 continuous_query_periods = \ esmon_common.config_value(config, esmon_common.CSTR_CONTINUOUS_QUERY_PERIODS) if continuous_query_periods is None: logging.error( "can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_CONTINUOUS_QUERY_PERIODS, config_fpath) return -1 lustre_default_version = \ esmon_common.config_value(config, esmon_common.CSTR_LUSTRE_DEFAULT_VERSION) if lustre_default_version is None: logging.error("[%s] is not configured, please correct file [%s]", esmon_common.CSTR_LUSTRE_DEFAULT_VERSION, config_fpath) return -1 lustre_exp_ost = \ esmon_common.config_value(config, esmon_common.CSTR_LUSTRE_EXP_OST) if lustre_exp_ost is None: lustre_exp_ost = False logging.info( "[%s] is not configured, setting it to [False] by default", esmon_common.CSTR_LUSTRE_EXP_OST) lustre_exp_mdt = \ esmon_common.config_value(config, esmon_common.CSTR_LUSTRE_EXP_MDT) if lustre_exp_mdt is None: lustre_exp_mdt = False logging.info( "[%s] is not configured, setting it to [False] by default", esmon_common.CSTR_LUSTRE_EXP_MDT) server_config = esmon_common.config_value(config, esmon_common.CSTR_SERVER) if server_config is None: logging.error( "can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_SERVER, config_fpath) return -1 agent_configs = esmon_common.config_value(config, esmon_common.CSTR_AGENTS) if agent_configs is None: logging.error( "can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_AGENTS, config_fpath) return -1 ret, agents_reinstall = \ esmon_config.install_config_value(config, esmon_common.CSTR_AGENTS_REINSTALL) if ret: return -1 local_host = ssh_host.SSHHost("localhost", local=True) command = "ls esmon-*.iso" retval = local_host.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, local_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 current_dir = os.getcwd() iso_names = retval.cr_stdout.split() if len(iso_names) != 1: logging.error( "found unexpected ISOs [%s] under currect directory [%s]", iso_names, current_dir) return -1 iso_name = iso_names[0] iso_path = current_dir + "/" + iso_name command = "mkdir -p %s" % workspace retval = install_server.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, install_server.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 ret = install_server.sh_send_file(iso_path, workspace) if ret: logging.error( "failed to send ESMON ISO [%s] on local host to " "directory [%s] on host [%s]", iso_path, workspace, install_server.sh_hostname) return -1 host_iso_path = workspace + "/" + iso_name install_config = {} install_config[esmon_common.CSTR_ISO_PATH] = host_iso_path install_config[esmon_common.CSTR_SSH_HOSTS] = ssh_host_configs install_config[esmon_common.CSTR_AGENTS_REINSTALL] = agents_reinstall install_config[esmon_common.CSTR_AGENTS] = agent_configs install_config[esmon_common.CSTR_SERVER] = server_config install_config[esmon_common.CSTR_COLLECT_INTERVAL] = collect_interval install_config[ esmon_common.CSTR_CONTINUOUS_QUERY_PERIODS] = continuous_query_periods install_config[ esmon_common.CSTR_LUSTRE_DEFAULT_VERSION] = lustre_default_version install_config[esmon_common.CSTR_LUSTRE_EXP_OST] = lustre_exp_ost install_config[esmon_common.CSTR_LUSTRE_EXP_MDT] = lustre_exp_mdt install_config_string = yaml.dump(install_config, default_flow_style=False) install_config_fpath = workspace + "/" + esmon_common.ESMON_INSTALL_CONFIG_FNAME with open(install_config_fpath, "wt") as install_config_file: install_config_file.write(install_config_string) skip_install_test = esmon_common.config_value( config, esmon_common.CSTR_SKIP_INSTALL_TEST) if skip_install_test is None: logging.debug("no [%s] is configured, use [false] as default value", esmon_common.CSTR_SKIP_INSTALL_TEST) skip_install_test = False if not skip_install_test: ret = esmon_test_install(workspace, install_server, host_iso_path) if ret: return -1 ret = esmon_test_lustre(workspace, hosts, config, config_fpath, install_config, install_config_fpath) if ret: logging.error("failed to test Lustre") return -1 return 0
def esmon_test_lustre(workspace, hosts, config, config_fpath, install_config, install_config_fpath): """ Run Lustre tests """ # pylint: disable=too-many-branches,too-many-return-statements,unused-variable # pylint: disable=too-many-statements,too-many-locals,too-many-arguments lustre_rpm_dir = esmon_common.config_value( config, esmon_common.CSTR_LUSTRE_RPM_DIR) if lustre_rpm_dir is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_LUSTRE_RPM_DIR, config_fpath) return -1 e2fsprogs_rpm_dir = esmon_common.config_value( config, esmon_common.CSTR_E2FSPROGS_RPM_DIR) if e2fsprogs_rpm_dir is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_E2FSPROGS_RPM_DIR, config_fpath) return -1 cleanup = esmon_common.config_value(config, esmon_common.CSTR_CLEANUP) if cleanup is None: logging.debug("no [%s] is configured, use default value [false]", esmon_common.CSTR_CLEANUP) cleanup = False lustre_rpms = lustre.LustreRPMs(lustre_rpm_dir) ret = lustre_rpms.lr_prepare() if ret: logging.error("failed to prepare Lustre RPMs") return -1 lustre_configs = esmon_common.config_value(config, esmon_common.CSTR_LUSTRES) if lustre_configs is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_LUSTRES, config_fpath) return -1 for lustre_config in lustre_configs: # Parse general configs of Lustre file system fsname = esmon_common.config_value(lustre_config, esmon_common.CSTR_FSNAME) if fsname is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_FSNAME, config_fpath) return -1 lazy_prepare = esmon_common.config_value( lustre_config, esmon_common.CSTR_LAZY_PREPARE) if lazy_prepare is None: lazy_prepare = False logging.info( "no [%s] is configured for fs [%s], using default value false", esmon_common.CSTR_LAZY_PREPARE, fsname) return -1 lustre_fs = lustre.LustreFilesystem(fsname) # Parse MDT configs mdt_configs = esmon_common.config_value(lustre_config, esmon_common.CSTR_MDTS) if mdt_configs is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_MDTS, config_fpath) return -1 lustre_hosts = {} for mdt_config in mdt_configs: mdt_index = esmon_common.config_value(mdt_config, esmon_common.CSTR_INDEX) if mdt_index is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_INDEX, config_fpath) return -1 host_id = esmon_common.config_value(mdt_config, esmon_common.CSTR_HOST_ID) if host_id is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_HOST_ID, config_fpath) return -1 if host_id not in hosts: logging.error( "no host with [%s] is configured in hosts, " "please correct file [%s]", host_id, config_fpath) return -1 device = esmon_common.config_value(mdt_config, esmon_common.CSTR_DEVICE) if device is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_DEVICE, config_fpath) return -1 nid = esmon_common.config_value(mdt_config, esmon_common.CSTR_NID) if nid is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_NID, config_fpath) return -1 backfs_type = esmon_common.config_value( mdt_config, esmon_common.CSTR_BACKFS_TYPE) if backfs_type is None: logging.debug("no [%s] is configured, use default value [%s]", esmon_common.CSTR_BACKFS_TYPE, lustre.LDISKFS) backfs_type = lustre.LDISKFS is_mgs = esmon_common.config_value(mdt_config, esmon_common.CSTR_IS_MGS) if is_mgs is None: logging.debug( "no [%s] is configured, use default value [False]", esmon_common.CSTR_IS_MGS) is_mgs = False if is_mgs: if lustre_fs.lf_mgs_nid is not None: logging.error( "multiple MDTs with [%s] configured to " "[True], please correct file [%s]", esmon_common.CSTR_IS_MGS, config_fpath) return -1 lustre_fs.lf_mgs_nid = nid host = hosts[host_id] lustre_host = lustre.LustreServerHost( host.sh_hostname, identity_file=host.sh_identity_file, local=host.sh_local, host_id=host_id) if host_id not in lustre_hosts: lustre_hosts[host_id] = lustre_host mnt = "/mnt/%s_mdt_%s" % (fsname, mdt_index) lustre.LustreMDT(lustre_fs, mdt_index, lustre_host, device, mnt, is_mgs=is_mgs, backfs_type=backfs_type) if lustre_fs.lf_mgs_nid is None: logging.error( "None MDT is configured with [%s], " "please correct file [%s]", esmon_common.CSTR_IS_MGS, config_fpath) return -1 # Parse OST configs ost_configs = esmon_common.config_value(lustre_config, esmon_common.CSTR_OSTS) if ost_configs is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_OSTS, config_fpath) return -1 for ost_config in ost_configs: ost_index = esmon_common.config_value(ost_config, esmon_common.CSTR_INDEX) if ost_index is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_INDEX, config_fpath) return -1 host_id = esmon_common.config_value(ost_config, esmon_common.CSTR_HOST_ID) if host_id is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_HOST_ID, config_fpath) return -1 if host_id not in hosts: logging.error( "no host with ID [%s] is configured in hosts, " "please correct file [%s]", host_id, config_fpath) return -1 backfs_type = esmon_common.config_value( ost_config, esmon_common.CSTR_BACKFS_TYPE) if backfs_type is None: logging.debug("no [%s] is configured, use default value [%s]", esmon_common.CSTR_BACKFS_TYPE, lustre.LDISKFS) backfs_type = lustre.LDISKFS device = esmon_common.config_value(ost_config, esmon_common.CSTR_DEVICE) if device is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_DEVICE, config_fpath) return -1 host = hosts[host_id] lustre_host = lustre.LustreServerHost( host.sh_hostname, identity_file=host.sh_identity_file, local=host.sh_local, host_id=host_id) if host_id not in lustre_hosts: lustre_hosts[host_id] = lustre_host mnt = "/mnt/%s_ost_%s" % (fsname, ost_index) lustre.LustreOST(lustre_fs, ost_index, lustre_host, device, mnt, backfs_type=backfs_type) # Parse client configs client_configs = esmon_common.config_value(lustre_config, esmon_common.CSTR_CLIENTS) if client_configs is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_CLIENTS, config_fpath) return -1 for client_config in client_configs: host_id = esmon_common.config_value(client_config, esmon_common.CSTR_HOST_ID) if host_id is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_HOST_ID, config_fpath) return -1 if host_id not in hosts: logging.error( "no host with [%s] is configured in hosts, " "please correct file [%s]", host_id, config_fpath) return -1 mnt = esmon_common.config_value(client_config, esmon_common.CSTR_MNT) if mnt is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_MNT, config_fpath) return -1 host = hosts[host_id] lustre_host = lustre.LustreServerHost( host.sh_hostname, identity_file=host.sh_identity_file, local=host.sh_local, host_id=host_id) if host_id not in lustre_hosts: lustre_hosts[host_id] = lustre_host lustre.LustreClient(lustre_fs, host, mnt) # Umount all clients first, so as to prevent stuck caused by umounted OSTs/MDTs for host_id, lustre_host in lustre_hosts.iteritems(): logging.debug( "trying to umount Lustre clients on host [%s] with host_id [%s]", lustre_host.sh_hostname, host_id) ret = lustre_host.lsh_lustre_umount_services(client_only=True) if ret: logging.info( "failed to umount Lustre clients, reboot is needed") # Install RPMs on MDS, OSS and clients for host_id, lustre_host in lustre_hosts.iteritems(): logging.debug( "trying to install Lustre RPMs on host [%s] with host_id [%s]", lustre_host.sh_hostname, host_id) ret = lustre_host.lsh_lustre_prepare(workspace, lustre_rpms, e2fsprogs_rpm_dir, lazy_prepare=lazy_prepare) if ret: logging.error("failed to install Lustre RPMs on host [%s]", lustre_host.sh_hostname) return -1 ret = lustre_fs.lf_format() if ret: logging.error("failed to format file system [%s]", lustre_fs.lf_fsname) return -1 ret = lustre_fs.lf_mount() if ret: logging.error("failed to mount file system [%s]", lustre_fs.lf_fsname) return -1 # Enable jobstat ret = lustre_fs.lf_conf_param("sys.jobid_var=procname_uid") if ret: logging.error("failed to change jobid_var of file system [%s]", lustre_fs.lf_fsname) return -1 ret, esmon_server, esmon_clients = \ esmon_install_nodeps.esmon_install_parse_config(workspace, install_config, install_config_fpath) if ret: logging.error("failed to parse config [%s]", config_fpath) return -1 for esmon_client in esmon_clients.values(): ret = esmon_client.ec_collectd_send_config(True) if ret: logging.error( "failed to send test config to esmon client on host [%s]", esmon_client.ec_host.sh_hostname) return -1 ret = esmon_client.ec_collectd_restart() if ret: logging.error("failed to start esmon client on host [%s]", esmon_client.ec_host.sh_hostname) return -1 ret = esmon_client.ec_collectd_config_test.cc_check() if ret: logging.error( "Influxdb doesn't have expected datapoints from " "host [%s]", esmon_client.ec_host.sh_hostname) return -1 for host_id, lustre_host in lustre_hosts.iteritems(): esmon_client = None for tmp_client in esmon_clients.values(): if tmp_client.ec_host.sh_host_id == host_id: esmon_client = tmp_client break if esmon_client is None: logging.info( "host [%s] is not configured as ESMON client in config file [%s]", lustre_host.sh_hostname, config_fpath) continue ret = lustre_host_metric_check(lustre_host, esmon_client) if ret: logging.error("failed to check Lustre metrics of " "host [%s]", lustre_host.sh_hostname) return -1 for esmon_client in esmon_clients.values(): ret = esmon_client.ec_collectd_send_config(False) if ret: logging.error( "failed to send final config to esmon client on host [%s]", esmon_client.ec_host.sh_hostname) return -1 ret = esmon_client.ec_collectd_restart() if ret: logging.error("failed to start esmon client on host [%s]", esmon_client.ec_host.sh_hostname) return -1 if cleanup: ret = lustre_fs.lf_umount() if ret: logging.error("failed to umount file system [%s]", lustre_fs.lf_fsname) return -1 return 0
def esmon_io_loading(workspace, config, confpath): """ Start the I/O """ # pylint: disable=too-many-locals,unused-argument,too-many-return-statements # pylint: disable=too-many-branches,too-many-statements ssh_host_configs = esmon_common.config_value(config, esmon_common.CSTR_SSH_HOSTS) if ssh_host_configs is None: logging.error( "can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_SSH_HOSTS, confpath) return -1 hosts = {} for host_config in ssh_host_configs: host_id = host_config["host_id"] if host_id is None: logging.error( "can NOT find [host_id] in the config of a " "SSH host, please correct file [%s]", confpath) return -1 hostname = esmon_common.config_value(host_config, "hostname") if hostname is None: logging.error( "can NOT find [hostname] in the config of SSH host " "with ID [%s], please correct file [%s]", host_id, confpath) return -1 local = esmon_common.config_value(host_config, esmon_common.CSTR_LOCAL_HOST) if local is None: logging.debug( "can NOT find [%s] in the config of SSH host " "with ID [%s], use [false] as default value", esmon_common.CSTR_LOCAL_HOST, host_id) local = False mapping_dict = {esmon_common.ESMON_CONFIG_CSTR_NONE: None} ssh_identity_file = esmon_common.config_value( host_config, esmon_common.CSTR_SSH_IDENTITY_FILE, mapping_dict=mapping_dict) if host_id in hosts: logging.error( "multiple SSH hosts with the same ID [%s], please " "correct file [%s]", host_id, confpath) return -1 host = ssh_host.SSHHost(hostname, ssh_identity_file, local=local) hosts[host_id] = host # Parse the Lustre client configuration. lustre_configs = esmon_common.config_value(config, esmon_common.CSTR_LUSTRES) if lustre_configs is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_LUSTRES, confpath) return -1 for lustre_config in lustre_configs: # Parse general configs of Lustre file system fsname = esmon_common.config_value(lustre_config, esmon_common.CSTR_FSNAME) if fsname is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_FSNAME, confpath) return -1 lazy_prepare = esmon_common.config_value( lustre_config, esmon_common.CSTR_LAZY_PREPARE) if lazy_prepare is None: lazy_prepare = False logging.info( "no [%s] is configured for fs [%s], using default value false", esmon_common.CSTR_LAZY_PREPARE, fsname) return -1 lustre_fs = lustre.LustreFilesystem(fsname) lustre_hosts = {} # Parse OST configs ost_configs = esmon_common.config_value(lustre_config, esmon_common.CSTR_OSTS) if ost_configs is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_OSTS, confpath) return -1 for ost_config in ost_configs: ost_index = esmon_common.config_value(ost_config, esmon_common.CSTR_INDEX) if ost_index is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_INDEX, confpath) return -1 host_id = esmon_common.config_value(ost_config, esmon_common.CSTR_HOST_ID) if host_id is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_HOST_ID, confpath) return -1 if host_id not in hosts: logging.error( "no host with ID [%s] is configured in hosts, " "please correct file [%s]", host_id, confpath) return -1 device = esmon_common.config_value(ost_config, esmon_common.CSTR_DEVICE) if device is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_DEVICE, confpath) return -1 host = hosts[host_id] lustre_host = lustre.LustreServerHost( host.sh_hostname, identity_file=host.sh_identity_file, local=host.sh_local, host_id=host_id) if host_id not in lustre_hosts: lustre_hosts[host_id] = lustre_host mnt = "/mnt/%s_ost_%s" % (fsname, ost_index) lustre.LustreOST(lustre_fs, ost_index, lustre_host, device, mnt) # Parse client configs client_configs = esmon_common.config_value(lustre_config, esmon_common.CSTR_CLIENTS) if client_configs is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_CLIENTS, confpath) return -1 for client_config in client_configs: host_id = esmon_common.config_value(client_config, esmon_common.CSTR_HOST_ID) if host_id is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_HOST_ID, confpath) return -1 if host_id not in hosts: logging.error( "no host with [%s] is configured in hosts, " "please correct file [%s]", host_id, confpath) return -1 mnt = esmon_common.config_value(client_config, esmon_common.CSTR_MNT) if mnt is None: logging.error( "no [%s] is configured, please correct file [%s]", esmon_common.CSTR_MNT, confpath) return -1 host = hosts[host_id] lustre_host = lustre.LustreServerHost( host.sh_hostname, identity_file=host.sh_identity_file, local=host.sh_local, host_id=host_id) if host_id not in lustre_hosts: lustre_hosts[host_id] = lustre_host lustre.LustreClient(lustre_fs, host, mnt) ret = esmon_launch_ioload_daemon(lustre_fs) if ret: return ret return 0
def esmon_vm_install(workspace, config, config_fpath): """ Start to test with ESMON """ # pylint: disable=too-many-return-statements,too-many-locals # pylint: disable=too-many-branches,too-many-statements ssh_host_configs = esmon_common.config_value(config, esmon_common.CSTR_SSH_HOSTS) if ssh_host_configs is None: logging.error("can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_SSH_HOSTS, config_fpath) return -1 hosts = {} for host_config in ssh_host_configs: host_id = host_config[esmon_common.CSTR_HOST_ID] if host_id is None: logging.error("can NOT find [%s] in the config of a " "SSH host, please correct file [%s]", esmon_common.CSTR_HOST_ID, config_fpath) return -1 hostname = esmon_common.config_value(host_config, esmon_common.CSTR_HOSTNAME) if hostname is None: logging.error("can NOT find [%s] in the config of SSH host " "with ID [%s], please correct file [%s]", esmon_common.CSTR_HOSTNAME, host_id, config_fpath) return -1 local = esmon_common.config_value(host_config, esmon_common.CSTR_LOCAL_HOST) if local is None: logging.debug("can NOT find [%s] in the config of SSH host " "with ID [%s], use [false] as default value", esmon_common.CSTR_LOCAL_HOST, host_id) local = False mapping_dict = {esmon_common.ESMON_CONFIG_CSTR_NONE: None} ssh_identity_file = esmon_common.config_value(host_config, esmon_common.CSTR_SSH_IDENTITY_FILE, mapping_dict=mapping_dict) if host_id in hosts: logging.error("multiple SSH hosts with the same ID [%s], please " "correct file [%s]", host_id, config_fpath) return -1 host = ssh_host.SSHHost(hostname, ssh_identity_file, local=local) hosts[host_id] = host template_configs = esmon_common.config_value(config, esmon_common.CSTR_TEMPLATES) if template_configs is None: logging.error("can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_TEMPLATES, config_fpath) return -1 templates = {} for template_config in template_configs: template_hostname = esmon_common.config_value(template_config, esmon_common.CSTR_HOSTNAME) if template_hostname is None: logging.error("can NOT find [%s] in the config of a " "SSH host, please correct file [%s]", esmon_common.CSTR_HOSTNAME, config_fpath) return -1 reinstall = esmon_common.config_value(template_config, esmon_common.CSTR_REINSTALL) if reinstall is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_REINSTALL, config_fpath) return -1 internet = esmon_common.config_value(template_config, esmon_common.CSTR_INTERNET) if internet is None: internet = False logging.debug("no [%s] is configured, will " "not add internet support", esmon_common.CSTR_INTERNET) ram_size = esmon_common.config_value(template_config, esmon_common.CSTR_RAM_SIZE) if ram_size is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_RAM_SIZE, config_fpath) return -1 disk_sizes = esmon_common.config_value(template_config, esmon_common.CSTR_DISK_SIZES) if disk_sizes is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_DISK_SIZES, config_fpath) return -1 network_configs = esmon_common.config_value(template_config, esmon_common.CSTR_NETWORK_CONFIGS) if network_configs is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_NETWORK_CONFIGS, config_fpath) return -1 iso = esmon_common.config_value(template_config, esmon_common.CSTR_ISO) if iso is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_ISO, config_fpath) return -1 distro = esmon_common.config_value(template_config, esmon_common.CSTR_DISTRO) if distro is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_DISTRO, config_fpath) return -1 image_dir = esmon_common.config_value(template_config, esmon_common.CSTR_IMAGE_DIR) if image_dir is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_IMAGE_DIR, config_fpath) return -1 server_host_id = esmon_common.config_value(template_config, esmon_common.CSTR_SERVER_HOST_ID) if server_host_id is None: logging.error("no [%s] is configured, please correct file [%s]", esmon_common.CSTR_SERVER_HOST_ID, config_fpath) return -1 if server_host_id not in hosts: logging.error("SSH host with ID [%s] is NOT configured in " "[%s], please correct file [%s]", server_host_id, esmon_common.CSTR_SSH_HOSTS, config_fpath) return -1 server_host = hosts[server_host_id] command = "mkdir -p %s" % workspace retval = server_host.sh_run(command) if retval.cr_exit_status: logging.error("failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, server_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 template = VirtTemplate(server_host, iso, template_hostname, internet, network_configs, image_dir, distro, ram_size, disk_sizes) templates[template_hostname] = template state = server_host.sh_virsh_dominfo_state(template_hostname) if not reinstall and state is not None: logging.debug("skipping reinstall of template [%s] according to config", template_hostname) continue ret = vm_install(workspace, server_host, iso, template_hostname, internet, network_configs, image_dir, distro, ram_size, disk_sizes) if ret: logging.error("failed to create virtual machine template [%s]", template_hostname) return -1 vm_host_configs = esmon_common.config_value(config, "vm_hosts") if vm_host_configs is None: logging.error("no [vm_hosts] is configured, " "please correct file [%s]", config_fpath) return -1 vm_hosts = [] hosts_string = """127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 """ for vm_host_config in vm_host_configs: hostname = esmon_common.config_value(vm_host_config, esmon_common.CSTR_HOSTNAME) if hostname is None: logging.error("no [hostname] is configured for a vm_host, " "please correct file [%s]", config_fpath) return -1 ips = esmon_common.config_value(vm_host_config, esmon_common.CSTR_HOST_IPS) if ips is None: logging.error("no [%s] is configured for a vm_host, " "please correct file [%s]", esmon_common.CSTR_HOST_IPS, config_fpath) return -1 template_hostname = esmon_common.config_value(vm_host_config, esmon_common.CSTR_TEMPLATE_HOSTNAME) if template_hostname is None: logging.error("can NOT find [%s] in the config of a " "SSH host, please correct file [%s]", esmon_common.CSTR_TEMPLATE_HOSTNAME, config_fpath) return -1 if template_hostname not in templates: logging.error("template with hostname [%s] is NOT configured in " "[%s], please correct file [%s]", template_hostname, esmon_common.CSTR_TEMPLATES, config_fpath) return -1 template = templates[template_hostname] reinstall = esmon_common.config_value(vm_host_config, "reinstall") state = template.vt_server_host.sh_virsh_dominfo_state(hostname) if reinstall is None: reinstall = False if state is None: reinstall = True if not reinstall: ret = vm_start(workspace, template.vt_server_host, hostname, template.vt_network_configs, ips, template.vt_template_hostname, template.vt_image_dir, template.vt_distro, template.vt_internet, len(template.vt_disk_sizes)) if ret: logging.error("virtual machine [%s] can't be started", hostname) return -1 else: ret = vm_clone(workspace, template.vt_server_host, hostname, template.vt_network_configs, ips, template.vt_template_hostname, template.vt_image_dir, template.vt_distro, template.vt_internet, len(template.vt_disk_sizes)) if ret: logging.error("failed to create virtual machine [%s] based on " "template [%s]", hostname, template.vt_template_hostname) return -1 host_ip = ips[0] vm_host = ssh_host.SSHHost(hostname) hosts_string += ("%s %s\n" % (host_ip, hostname)) vm_hosts.append(vm_host) host_configs = esmon_common.config_value(config, esmon_common.CSTR_HOSTS) if host_configs is None: logging.error("can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_HOSTS, config_fpath) return -1 for host_config in host_configs: hostname = esmon_common.config_value(host_config, esmon_common.CSTR_HOSTNAME) if hostname is None: logging.error("can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_HOSTNAME, config_fpath) return -1 host_ip = esmon_common.config_value(host_config, esmon_common.CSTR_IP) if host_ip is None: logging.error("can NOT find [%s] in the config file, " "please correct file [%s]", esmon_common.CSTR_IP, config_fpath) return -1 hosts_string += ("%s %s\n" % (host_ip, hostname)) hosts_fpath = workspace + "/hosts" with open(hosts_fpath, "wt") as hosts_file: hosts_file.write(hosts_string) for host in vm_hosts: ret = host.sh_send_file(hosts_fpath, "/etc") if ret: logging.error("failed to send hosts file [%s] on local host to " "directory [%s] on host [%s]", hosts_fpath, workspace, host.sh_hostname) return -1 # Clear the known_hosts, otherwise the reinstalled hosts can't be # accessed by other hosts command = "> /root/.ssh/known_hosts" retval = host.sh_run(command) if retval.cr_exit_status: logging.error("failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 return 0
def esmon_do_build(current_dir, relative_workspace, config, config_fpath): """ Build the ISO """ # pylint: disable=too-many-locals,too-many-return-statements # pylint: disable=too-many-branches,too-many-statements hosts = {} if parse_host_configs(config, config_fpath, hosts): logging.error("failed to parse host configs") return -1 centos6_host_config = esmon_common.config_value(config, "centos6_host") if centos6_host_config is None: logging.info( "can NOT find [centos6_host] in the config file [%s], " "diableing CentOS6 support", config_fpath) centos6_host = None else: centos6_host_id = esmon_common.config_value(centos6_host_config, "host_id") if centos6_host_id is None: logging.error( "can NOT find [host_id] in the config of [centos6_host], " "please correct file [%s]", config_fpath) return -1 if centos6_host_id not in hosts: logging.error( "SSH host with ID [%s] is NOT configured in " "[ssh_hosts], please correct file [%s]", centos6_host_id, config_fpath) return -1 centos6_host = hosts[centos6_host_id] local_host = ssh_host.SSHHost("localhost", local=True) distro = local_host.sh_distro() if distro != ssh_host.DISTRO_RHEL7: logging.error("build can only be launched on RHEL7/CentOS7 host") return -1 iso_cached_dir = current_dir + "/../iso_cached_dir" collectd_git_path = current_dir + "/../" + "collectd.git" rpm_dir = iso_cached_dir + "/RPMS" command = ("mkdir -p %s" % (rpm_dir)) retval = local_host.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, local_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 collectd_git_url = esmon_common.config_value(config, "collectd_git_url") if collectd_git_url is None: collectd_git_url = "https://github.com/DDNStorage/collectd.git" logging.info( "can NOT find [collectd_git_url] in the config, " "use default value [%s]", collectd_git_url) collectd_git_branch = esmon_common.config_value(config, "collectd_git_branch") if collectd_git_branch is None: collectd_git_branch = "master-ddn" logging.info( "can NOT find [collectd_git_branch] in the config, " "use default value [%s]", collectd_git_branch) ret = esmon_common.clone_src_from_git(collectd_git_path, collectd_git_url, collectd_git_branch) if ret: logging.error( "failed to clone Collectd branch [%s] from [%s] to " "directory [%s]", collectd_git_branch, collectd_git_url, collectd_git_path) return -1 command = ("cd %s && git rev-parse --short HEAD" % collectd_git_path) retval = local_host.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, local_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 collectd_git_version = retval.cr_stdout.strip() command = (r"cd %s && grep Version contrib/redhat/collectd.spec | " r"grep -v \# | awk '{print $2}'" % collectd_git_path) retval = local_host.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, local_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 collectd_version_string = retval.cr_stdout.strip() collectd_version = collectd_version_string.replace('%{?rev}', collectd_git_version) collectd_tarball_name = "collectd-" + collectd_version command = (r"cd %s && grep Release contrib/redhat/collectd.spec | " r"grep -v \# | awk '{print $2}'" % collectd_git_path) retval = local_host.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, local_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 collectd_release_string = retval.cr_stdout.strip() collectd_release = collectd_release_string.replace('%{?dist}', '') collectd_version_release = collectd_version + "-" + collectd_release if centos6_host is not None: centos6_workspace = ESMON_BUILD_LOG_DIR + "/" + relative_workspace ret = host_build(centos6_workspace, centos6_host, local_host, collectd_git_path, iso_cached_dir, collectd_version_release, collectd_tarball_name, ssh_host.DISTRO_RHEL6) if ret: logging.error("failed to prepare RPMs of CentOS6 on host [%s]", centos6_host.sh_hostname) return -1 # The build host of CentOS7 could potentially be another host, not local # host local_workspace = current_dir + "/" + relative_workspace ret = host_build(local_workspace, local_host, local_host, collectd_git_path, iso_cached_dir, collectd_version_release, collectd_tarball_name, ssh_host.DISTRO_RHEL7) if ret: logging.error("failed to prepare RPMs of CentOS7 on local host") return -1 local_distro_rpm_dir = ( "%s/%s/%s" % (iso_cached_dir, RPM_STRING, ssh_host.DISTRO_RHEL7)) local_server_rpm_dir = ("%s/%s" % (local_distro_rpm_dir, SERVER_STRING)) command = ("mkdir -p %s" % local_server_rpm_dir) retval = local_host.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, local_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 server_rpms = {} name = "grafana-6.0.2-1.x86_64.rpm" url = ("https://dl.grafana.com/oss/release/grafana-6.0.2-1.x86_64.rpm") server_rpms[name] = url name = "influxdb-1.7.4.x86_64.rpm" url = ("https://dl.influxdata.com/influxdb/releases/" "influxdb-1.7.4.x86_64.rpm") server_rpms[name] = url for name, url in server_rpms.iteritems(): fpath = ("%s/%s" % (local_server_rpm_dir, name)) command = "test -e %s" % fpath retval = local_host.sh_run(command) if retval.cr_exit_status: logging.debug("file [%s] doesn't exist, downloading it", fpath) command = ("cd %s && wget --no-check-certificate %s" % (local_server_rpm_dir, url)) retval = local_host.sh_run(command, timeout=3600) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, local_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 server_existing_files = os.listdir(local_server_rpm_dir) for server_rpm in server_rpms.iterkeys(): server_existing_files.remove(server_rpm) for extra_fname in server_existing_files: logging.warning( "find unknown file [%s] under directory [%s], removing", extra_fname, local_server_rpm_dir) command = ("rm -fr %s/%s" % (local_server_rpm_dir, extra_fname)) retval = local_host.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, local_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 ret = esmon_download_grafana_plugins(local_host, iso_cached_dir) if ret: logging.error("failed to download Grafana plugins") return -1 dependent_existing_files = os.listdir(iso_cached_dir) for panel_name in esmon_common.GRAFANA_PLUGIN_GITS.iterkeys(): dependent_existing_files.remove(panel_name) dependent_existing_files.remove("RPMS") for extra_fname in dependent_existing_files: logging.warning( "find unknown file [%s] under directory [%s], removing", extra_fname, iso_cached_dir) command = ("rm -fr %s/%s" % (iso_cached_dir, extra_fname)) retval = local_host.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, local_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 command = ("cd %s && rm esmon-*.tar.bz2 esmon-*.tar.gz -f && " "sh autogen.sh && " "./configure --with-cached-iso=%s && " "make" % (current_dir, iso_cached_dir)) retval = local_host.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, local_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 if centos6_host is not None: command = ("rm -fr %s" % (centos6_workspace)) retval = centos6_host.sh_run(command) if retval.cr_exit_status: logging.error( "failed to run command [%s] on host [%s], " "ret = [%d], stdout = [%s], stderr = [%s]", command, centos6_host.sh_hostname, retval.cr_exit_status, retval.cr_stdout, retval.cr_stderr) return -1 return 0