def stop(service_name): """ Stop a service via init script or systemd. 'service_name' is used as the base of the keys in the core.config and core.state dictionaries. If we started the service, the service is stopped by doing "service service_name stop" or "systemctl stop service_name". Globals used: core.state[service_name.started-service] is used to determine if we started the service. After shutdown, this is set to False. """ if not core.state.get(service_name + '.started-service'): core.skip('did not start service ' + service_name) return if core.el_release() >= 7: command = ('systemctl', 'stop', service_name) else: command = ('service', service_name, 'stop') core.check_system(command, 'Stop ' + service_name + ' service') core.state[service_name + '.started-service'] = False
def test_04_xrootd_fuse(self): # This tests xrootd-fuse using a mount in /mnt core.skip_ok_unless_installed('xrootd', 'xrootd-client', by_dependency=True) self.skip_ok_unless(os.path.exists("/mnt"), "/mnt did not exist") self.skip_ok_if(core.config['xrootd.gsi'] == "ON", 'fuse incompatible with GSI') if not os.path.exists(TestXrootd.__fuse_path): os.mkdir(TestXrootd.__fuse_path) hostname = socket.getfqdn() #command = ('xrootdfs',TestXrootd.__fuse_path,'-o','rdr=xroot://localhost:1094//tmp','-o','uid=xrootd') command = ('mount', '-t', 'fuse', '-o', 'rdr=xroot://localhost:1094//tmp,uid=xrootd', 'xrootdfs', TestXrootd.__fuse_path) command_str = ' '.join(command) #For some reason, sub process hangs on fuse processes, use os.system #status, stdout, stderr = core.system(command_str,shell=True) os.system(command_str) # Copy a file in and see if it made it into the fuse mount xrootd_url = 'root://%s/%s/copied_file.txt' % (hostname, "/tmp") command = ('xrdcp', '--debug', '3', TestXrootd.__data_path, xrootd_url) core.system(command, user=True) command = ('ls', "/tmp/copied_file.txt") core.check_system(command, "Checking file is copied to xrootd fuse mount correctly", user=True) command = ('umount', TestXrootd.__fuse_path) core.system(command) os.rmdir(TestXrootd.__fuse_path) files.remove("/tmp/copied_file.txt")
def add_user(vo, usercert, use_voms_admin=False): """Add the user identified by the given cert to the specified VO. May use voms-admin or direct MySQL statements. The CA cert that issued the user cert must already be in the database's 'ca' table - this happens automatically if the CA cert is in /etc/grid-security/certificates when the VOMS database is created. """ usercert_dn, usercert_issuer = cagen.certificate_info(usercert) if use_voms_admin: hostname = socket.getfqdn() command = ('voms-admin', '--vo', core.config['voms.vo'], '--host', hostname, '--nousercert', 'create-user', usercert_dn, usercert_issuer, 'OSG Test User', 'root@localhost') core.check_system(command, 'Add VO user') else: dbname = 'voms_' + vo # Find the index in the "ca" table ("cid") for the OSG Test CA that gets created by voms_install_db. output, _, _, = mysql.check_execute(r'''SELECT cid FROM ca WHERE ca='%(usercert_issuer)s';''' % locals(), 'Get ID of user cert issuer from database', dbname) output = output.strip() assert output, "User cert issuer not found in database" ca = int(output) mysql.check_execute(r''' INSERT INTO `usr` VALUES (1,'%(usercert_dn)s',%(ca)d,NULL,'root@localhost',NULL); INSERT INTO `m` VALUES (1,1,1,NULL,NULL);''' % locals(), 'Add VO user', dbname)
def test_02_setup_selinux_port(self): if not core.state['selinux.mode']: self.skip_ok('SELinux disabled') core.skip_ok_unless_installed('policycoreutils-python') port = core.config['gsisshd.port'] core.check_system(['semanage', 'port', '--add', '-t', 'ssh_port_t', '--proto', 'tcp', port], message="Allow [gsi]sshd to use port %s" % port)
def start(service_name): """ Start a service via init script or systemd. 'service_name' is used as the base of the keys in the core.config and core.state dictionaries. The service is started by doing "service service_name start" or "systemctl start service_name". The service is not started up if core.state[service_name.started-service] is True. The following globals are set: core.config[service_name.sentinel-file] is set to the value of sentinel_file, if specified. """ if core.state.get(service_name + '.started-service'): core.skip('service ' + service_name + ' already running (flagged as started)') return if core.el_release() >= 7: command = ('systemctl', 'start', service_name) else: command = ('service', service_name, 'start') core.check_system(command, 'Start ' + service_name + ' service') core.state[service_name + '.started-service'] = True
def test_02_start_slurmdbd(self): core.state['slurmdbd.started-service'] = False self.slurm_reqs() core.skip_ok_unless_installed('slurm-slurmdbd') self.skip_bad_unless(mysql.is_running(), 'slurmdbd requires mysql') core.config['slurmdbd.config'] = '/etc/slurm/slurmdbd.conf' core.config['slurmdbd.user'] = "******" core.config['slurmdbd.name'] = "osg_test_slurmdb" mysql.check_execute("create database %s; " % core.config['slurmdbd.name'], 'create slurmdb') mysql.check_execute("create user %s; " % core.config['slurmdbd.user'], 'add slurmdb user') mysql.check_execute("grant usage on *.* to %s; " % core.config['slurmdbd.user'], 'slurmdb user access') mysql.check_execute("grant all privileges on %s.* to %s identified by '%s'; " % (core.config['slurmdbd.name'], core.config['slurmdbd.user'], core.options.password), 'slurmdb user permissions') mysql.check_execute("flush privileges;", 'reload privileges') db_config_vals = {'name':core.config['slurmdbd.name'], 'user':core.config['slurmdbd.user'].split('\'')[1], 'pass':core.options.password} files.write(core.config['slurmdbd.config'], SLURMDBD_CONFIG % db_config_vals, owner='slurm', chmod=0o644) service.check_start('slurmdbd') # Adding the cluster to the database command = ('sacctmgr', '-i', 'add', 'cluster', CLUSTER_NAME) core.check_system(command, 'add slurm cluster')
def test_05_start_pbs(self): core.state['pbs_server.started-service'] = False core.state['torque.nodes-up'] = False core.skip_ok_unless_installed(*self.required_rpms, by_dependency=True) self.skip_bad_unless(service.is_running('trqauthd'), 'pbs_server requires trqauthd') self.skip_ok_if(service.is_running('pbs_server'), 'pbs server already running') server_log = '/var/log/torque/server_logs/' + date.today().strftime('%Y%m%d') try: server_log_stat = os.stat(server_log) except OSError: server_log_stat = None service.check_start('pbs_server') # Wait until the server is up before writing the rest of the config core.monitor_file(server_log, server_log_stat, '.*Server Ready.*', 60.0) core.check_system("echo '%s' | qmgr %s" % (self.pbs_config, core.get_hostname()), "Configuring pbs server", shell=True) # wait up to 5 minutes for the server to recognize the node start_time = time.time() while (time.time() - start_time) < 600: command = ('/usr/bin/qnodes', '-s', core.get_hostname()) stdout, _, fail = core.check_system(command, 'Get pbs node info') self.assert_(stdout.find('error') == -1, fail) if stdout.find('state = free'): core.state['torque.nodes-up'] = True break if not core.state['torque.nodes-up']: self.fail('PBS nodes not coming up')
def test_03_start_slurm(self): core.config['slurm.service-name'] = 'slurm' if core.el_release() == 7: core.config['slurm.service-name'] += 'd' core.config['slurm.ctld-service-name'] = 'slurmctld' core.state['%s.started-service' % core.config['slurm.service-name']] = False self.slurm_reqs() self.skip_ok_if(service.is_running(core.config['slurm.service-name']), 'slurm already running') stat = core.get_stat(CTLD_LOG) if core.el_release() == 7: # slurmctld is handled by /etc/init.d/slurm on EL6 command = ['slurmctld'] core.check_system(command, 'enable slurmctld') service.check_start(core.config['slurm.service-name']) service.check_start(core.config['slurm.ctld-service-name']) else: service.check_start(core.config['slurm.service-name']) core.monitor_file(CTLD_LOG, stat, 'slurm_rpc_node_registration complete for %s' % SHORT_HOSTNAME, 60.0) log_stat = core.get_stat(SLURM_LOG) core.monitor_file(SLURM_LOG, log_stat, 'slurmd started', 60.0) command = ['scontrol', 'update', 'nodename=%s' % SHORT_HOSTNAME, 'state=idle'] core.check_system(command, 'enable slurm node')
def test_01_stop_cvmfs(self): core.skip_ok_unless_installed('cvmfs') self.skip_ok_if(['cvmfs.started-server'] == False, 'did not start server') try: for temp_dir in core.config['cvmfs.debug-dirs']: command = ('umount', temp_dir) core.check_system(command, 'Manual cvmfs unmount failed') files.remove(temp_dir, force=True) except KeyError: pass # tempdir was never created if core.state['cvmfs.version'] < ('2', '1'): command = ('service', 'cvmfs', 'stop') else: command = ('cvmfs_config', 'umount') stdout, _, fail = core.check_system(command, 'Stop Cvmfs server') self.assert_(stdout.find('FAILED') == -1, fail) # Restart autofs to bring network filesystems back (specifically # homedirs on el5 fermicloud vms) if core.state['cvmfs.version'] >= ('2', '1'): stdout, _, fail = core.check_system(('service', 'autofs', 'restart'), 'Restart autofs') self.assert_(stdout.find('FAILED') == -1, fail) files.restore("/etc/fuse.conf","cvmfs") files.restore("/etc/auto.master","cvmfs") files.restore("/etc/cvmfs/default.local","cvmfs") files.restore("/etc/cvmfs/domain.d/cern.ch.local","cvmfs")
def test_01_gratia_admin_webpage (self): core.skip_ok_unless_installed('gratia-service') self.skip_bad_unless(core.state['tomcat.started'], 'Tomcat not started') host = core.get_hostname() admin_webpage = 'http://' + host + ':8880/gratia-administration/status.html?wantDetails=0' command = ('curl', admin_webpage) core.check_system(command, 'Unable to launch gratia admin webpage')
def debug_cvmfs(self, repo): temp_dir = tempfile.mkdtemp() core.config['cvmfs.debug-dirs'].append(temp_dir) command = ('mount', '-t', 'cvmfs', repo, temp_dir) core.check_system(command, 'Manual cvmfs mount failed') # If manual mount works, autofs is broken self.fail("Autofs failed to mount /cvmfs/%s" % repo)
def test_03_update_osg_release(self): core.state['install.release-updated'] = False if not core.options.updaterelease: return self.skip_bad_unless(core.state['install.success'], 'Install did not succeed') command = ['rpm', '-e', 'osg-release'] core.check_system(command, 'Erase osg-release') self.assert_(re.match('\d+\.\d+', core.options.updaterelease), "Unrecognized updaterelease format") rpm_url = 'https://repo.opensciencegrid.org/osg/' + core.options.updaterelease + '/osg-' + \ core.options.updaterelease + '-el' + str(core.el_release()) + '-release-latest.rpm' command = ['rpm', '-Uvh', rpm_url] core.check_system(command, 'Update osg-release') core.config['yum.clean_repos'] = ['osg'] + core.options.updaterepos yum.clean(*core.config['yum.clean_repos']) # If update repos weren't specified, just use osg-release if not core.options.updaterepos: core.options.updaterepos = ['osg'] core.state['install.release-updated'] = True core.osg_release(update_state=True)
def test_02_open_access(self): core.skip_ok_unless_installed('voms-admin-server', 'voms-admin-client') self.skip_ok_unless(core.state['voms.started-webapp'], 'VOMS Admin webapp not started') command = ('voms-admin', '--nousercert', '--vo', core.config['voms.vo'], 'add-ACL-entry', '/' + core.config['voms.vo'], 'ANYONE', 'VOMS_CA', 'CONTAINER_READ,MEMBERSHIP_READ', 'true') core.check_system(command, 'Add VOMS Admin ACL entry')
def test_04_copy_server_to_local_uberftp_parallel(self): core.skip_ok_unless_installed(*self.required_rpms) self.skip_bad_unless(core.state['gridftp.running-server'] is True, 'GridFTP not running') hostname = socket.getfqdn() temp_dir_source = tempfile.mkdtemp() temp_dir_dest = tempfile.mkdtemp() os.chmod(temp_dir_source, 0o777) os.chmod(temp_dir_dest, 0o777) filename = 'testfile_10MB' full_path = (os.path.join(temp_dir_source, filename)) command = ('dd', 'if=/dev/zero', 'of=' + full_path, 'bs=10485760', 'count=1') core.check_system(command, 'Create test file with dd', user=True) ftp_cmd = ('cd %s; lcd %s; get %s' % (temp_dir_source, temp_dir_dest, filename)) command = ('uberftp', '-parallel','10', hostname, ftp_cmd) status, stdout, stderr = core.system(command, True) fail = core.diagnose('UberFTP copy, local to URL', command, status, stdout, stderr) file_copied = os.path.exists(os.path.join(temp_dir_dest, filename)) shutil.rmtree(temp_dir_source) shutil.rmtree(temp_dir_dest) self.assertEqual(status, 0, fail) self.assert_(file_copied, 'Copied file missing')
def test_02_osg_version(self): core.skip_ok_unless_installed('osg-version') command = ('osg-version',) # First we verify that osg-version runs stdout = core.check_system(command, 'osg-version')[0] # Then we pull out the version number from the output version_pattern = re.compile(r'(\d+\.\d+\.\d+)') matches = version_pattern.search(stdout) # Is there a version number? self.assert_(matches is not None) osg_version = matches.group(1) # Get the version number from the RPM command = ('rpm', '-q', 'osg-version') stdout = core.check_system(command, "osg-version RPM version")[0] matches = version_pattern.search(stdout) self.assert_(matches is not None) # Verify that the versions match osg_version_rpm_version = matches.group(1) self.assert_(osg_version == osg_version_rpm_version)
def test_03_selinux(self): if not core.options.selinux: return self.skip_bad_unless(core.rpm_is_installed('libselinux-utils'), 'missing SELinux utils') if core.state['selinux.mode'] == 'permissive': core.check_system(('setenforce', 'Permissive'), 'set selinux mode to permissive')
def test_07_remove_test_user(self): if not core.state['general.user_added']: core.log_message('Did not add user') return username = core.options.username password_entry = pwd.getpwnam(username) globus_dir = os.path.join(password_entry.pw_dir, '.globus') # Remove certs in case userdel fails if core.state['general.user_cert_created']: files.remove(os.path.join(globus_dir, 'usercert.pem')) files.remove(os.path.join(globus_dir, 'userkey.pem')) # Get list of PIDs owned by the test user command = ('ps', '-U', username, '-u', username, '-o', 'pid=') _, output, _ = core.system(command) # Take no prisoners for pid in output.splitlines(): try: os.kill(int(pid), signal.SIGKILL) except OSError: continue command = ('userdel', username) core.check_system(command, "Remove user '%s'" % (username)) files.remove(os.path.join('/var/spool/mail', username)) shutil.rmtree(password_entry.pw_dir)
def test_03_grid_proxy_init(self): core.state['proxy.created'] = False core.skip_ok_unless_installed('globus-proxy-utils') command = ('grid-proxy-init', '-debug') password = core.options.password + '\n' core.check_system(command, 'Normal grid-proxy-init', user=True, stdin=password) core.state['proxy.created'] = True
def test_02_unset_selinux_port(self): if not core.state['selinux.mode']: self.skip_ok('no selinux') core.skip_ok_unless_installed('policycoreutils-python') port = core.config['gsisshd.port'] core.check_system(['semanage', 'port', '--delete', '--proto', 'tcp', port], message="Forbid [gsi]sshd to use port %s" % port)
def test_03_configure_cksums_dir(self): core.skip_ok_unless_installed('gridftp-hdfs') checksums_dir = '/cksums' command = ('mkdir', '-p', checksums_dir) core.check_system(command, 'Creating gridftp hadoop cheksums dir') command= ('chmod', 'a+w', checksums_dir) core.check_system(command, 'Making checksums dir writable')
def test_04_copy_server_to_local_gfal2_gftp_util(self): core.skip_ok_unless_installed('globus-gridftp-server-progs', 'gfal2-plugin-gridftp') self.skip_ok_unless(core.state['gridftp.running-server'], 'gridftp server not running') self.setup_temp_paths() command = ('gfal-copy', '-v', '-f', self.get_gftp_url_base() + TestGFAL2Util.__data_path, 'file://' + TestGFAL2Util.__local_path) core.check_system(command, "gfal2-util copy from GridFTP URL to local", user='******') file_copied = os.path.exists(TestGFAL2Util.__local_path) self.assert_(file_copied, 'Copied file missing')
def test_14_restore_etcgratia_collector_or_services(self): core.skip_ok_unless_installed('gratia-service') if 'gratia.etcgratia_collector_or_services-backedup' in core.state: gratia_directory_to_preserve = core.state['gratia.etcgratia_collector_or_services-backedup'] backup_path = gratia_directory_to_preserve + '_production' files.remove(gratia_directory_to_preserve, True) command = ("mv " + backup_path + " " + gratia_directory_to_preserve,) core.check_system(command, 'Could not restore ' + gratia_directory_to_preserve, shell=True)
def test_01_fix_symlinks(self): if core.rpm_is_installed('jdk') and \ (java.is_openjdk_installed() or java.is_openjdk_devel_installed()): # We regenerate these symlinks via alternatives so it's unnecessary to back them up command = ('rm', '-f', '/usr/bin/java', '/usr/bin/javac', '/usr/bin/javadoc', '/usr/bin/jar') core.check_system(command, 'Remove old symlinks') command = ['yum', 'reinstall', '-y', java.JAVA_RPM, java.JAVAC_RPM] yum.retry_command(command)
def test_05_my_proxy_retrieval(self): core.skip_ok_unless_installed('myproxy', 'myproxy-server') self.skip_bad_unless(core.state['myproxy.started-server'], 'MyProxy server failed to start') self.skip_bad_unless(core.state['myproxy.created'], 'MyProxy creation failed') command = ('myproxy-logon', '--verbose', '-s', 'localhost', '-l', core.options.username) password = core.config['myproxy.password'] + '\n' core.check_system(command, 'myproxy-logon retrieval', user=True, stdin=password)
def test_04_backup_etcgratia_collector_or_services(self): core.skip_ok_unless_installed('gratia-service') gratia_directory_to_preserve = core.config['gratia.config.dir'] + "/" + core.config['gratia.directory'] if os.path.exists(gratia_directory_to_preserve): backup_path = gratia_directory_to_preserve + '_production' command = ("cp -pr " + gratia_directory_to_preserve + " " + backup_path,) core.check_system(command, 'Could not backup ' + gratia_directory_to_preserve, shell=True) core.state['gratia.etcgratia_collector_or_services-backedup'] = gratia_directory_to_preserve
def test_02_remove_vo_webapp(self): core.skip_ok_unless_installed('voms-admin-server') self.skip_ok_unless(core.state['voms.installed-vo-webapp'], 'did not start webapp') command = ('service', 'voms-admin', 'stop') core.check_system(command, 'Uninstall VOMS Admin webapp(s)') self.assert_(not os.path.exists(core.config['voms.vo-webapp']), 'VOMS Admin VO context file still exists')
def test_02_reconfigure_condor(self): core.skip_ok_unless_installed('condor', 'htcondor-ce', 'htcondor-ce-client') self.skip_bad_unless(core.state['condor.running-service'], 'Condor not running') command = ('condor_reconfig', '-debug') core.check_system(command, 'Reconfigure Condor') self.assert_(condor.is_running(), 'Condor not running after reconfig')
def test_05_copy_local_to_server_gfal2_util(self): core.skip_ok_unless_installed('globus-gridftp-server-progs', 'gfal2-plugin-gridftp') self.skip_ok_unless(core.state['gridftp.running-server'], 'gridftp server not running') file_not_created = not os.path.exists(TestGFAL2Util.__remote_path) self.assert_(file_not_created, 'to be copied files does not exist') command = ('gfal-copy', '-v', 'file://' + TestGFAL2Util.__local_path, self.get_gftp_url_base() + TestGFAL2Util.__remote_path) core.check_system(command, "gfal2-util copy from local to GridFTP URL", user='******') file_copied = os.path.exists(TestGFAL2Util.__remote_path) self.assert_(file_copied, 'Copied file missing')
def test_01_open_access(self): core.state['voms-admin.read-members'] = False core.skip_ok_unless_installed('voms-admin-server', 'voms-admin-client') self.skip_bad_unless(core.state['tomcat.started'], 'Tomcat not started') command = ('voms-admin', '--nousercert', '--vo', core.config['voms.vo'], 'add-ACL-entry', '/' + core.config['voms.vo'], 'ANYONE', 'VOMS_CA', 'CONTAINER_READ,MEMBERSHIP_READ', 'true') core.check_system(command, 'Add VOMS Admin ACL entry') core.state['voms-admin.read-members'] = True
def test_06_remove_server_file_gfal2_util_gftp(self): core.skip_ok_unless_installed('globus-gridftp-server-progs', 'gfal2-plugin-gridftp') self.skip_ok_unless(core.state['gridftp.running-server'], 'gridftp server not running') command = ('gfal-rm', '-v', self.get_gftp_url_base() + TestGFAL2Util.__remote_path) core.check_system(command, "gfal2-util remove, URL file", user='******') file_removed = not os.path.exists(TestGFAL2Util.__remote_path) self.assert_(file_removed, 'Copied file still exists') files.remove(TestGFAL2Util.__remote_path) files.remove(TestGFAL2Util.__local_path)
def test_01_yum_repositories(self): pre = ('rpm', '--verify', '--nomd5', '--nosize', '--nomtime') core.check_system(pre + ('epel-release', ), 'Verify epel-release') # If osg-release isn't installed, try osg-release-itb try: core.check_system(pre + ('osg-release', ), 'Verify osg-release') except AssertionError: core.check_system(pre + ('osg-release-itb', ), 'Verify osg-release + osg-release-itb') core.config['install.original-release-ver'] = core.osg_release()
def test_01_stop_xrootd(self): if not core.rpm_is_installed('cvmfs'): core.skip('not installed') return if core.state['cvmfs.started-server'] == False: core.skip('did not start server') return command = ('service', 'cvmfs', 'stop') stdout, _, fail = core.check_system(command, 'Stop Cvmfs server') self.assert_(stdout.find('FAILED') == -1, fail) files.restore("/etc/fuse.conf","root") files.restore("/etc/auto.master","root") files.restore("/etc/cvmfs/default.local","root") files.restore("/etc/cvmfs/domain.d/cern.ch.local","root")
def test_02_start_seg(self): core.state['globus.started-seg'] = False core.config[ 'globus.seg-lockfile'] = '/var/lock/subsys/globus-scheduler-event-generator' if not core.rpm_is_installed('globus-scheduler-event-generator-progs'): return if os.path.exists(core.config['globus.seg-lockfile']): core.skip('SEG apparently running') return command = ('service', 'globus-scheduler-event-generator', 'start') stdout, _, fail = core.check_system(command, 'Start Globus SEG') self.assert_(stdout.find('FAILED') == -1, fail) self.assert_(os.path.exists(core.config['globus.seg-lockfile']), 'Globus SEG run lock file missing') core.state['globus.started-seg'] = True
def test_100_html_consumer(self): # This test must come after some of the metric tests so that we have # some job records to use to create an index.html core.skip_ok_unless_installed('rsv') index_file = "/usr/share/rsv/www/index.html" # We are going to make sure the html-consumer runs, and that the index # file is updated. old_mtime = os.stat(index_file).st_mtime stdout = core.check_system("su -c '/usr/libexec/rsv/consumers/html-consumer' rsv", "run html-consumer", shell=True)[0] self.assert_('html-consumer initializing' in stdout) new_mtime = os.stat(index_file).st_mtime self.assert_(old_mtime != new_mtime)
def test_09_start_voms(self): core.state['voms.started-server'] = False if not core.rpm_is_installed('voms-server'): core.skip('not installed') return if os.path.exists(core.config['voms.lock-file']): core.skip('apparently running') return command = ('service', 'voms', 'start') stdout, _, fail = core.check_system(command, 'Start VOMS service') self.assertEqual(stdout.find('FAILED'), -1, fail) self.assert_(os.path.exists(core.config['voms.lock-file']), 'VOMS server PID file is missing') core.state['voms.started-server'] = True
def test_01_start_gridftp(self): core.config['gridftp.pid-file'] = '/var/run/globus-gridftp-server.pid' core.state['gridftp.started-server'] = False if not core.rpm_is_installed('globus-gridftp-server-progs'): core.skip('not installed') return if os.path.exists(core.config['gridftp.pid-file']): core.skip('apparently running') return command = ('service', 'globus-gridftp-server', 'start') stdout, _, fail = core.check_system(command, 'Start GridFTP server') self.assert_(stdout.find('FAILED') == -1, fail) self.assert_(os.path.exists(core.config['gridftp.pid-file']), 'GridFTP server PID file missing') core.state['gridftp.started-server'] = True
def test_01_stop_xrootd(self): if not core.rpm_is_installed('xrootd-server'): core.skip('not installed') return if core.state['xrootd.started-server'] == False: core.skip('did not start server') return command = ('service', 'xrootd', 'stop') stdout, _, fail = core.check_system(command, 'Stop Xrootd server') self.assert_(stdout.find('FAILED') == -1, fail) self.assert_(not os.path.exists(core.config['xrootd.pid-file']), 'Xrootd server PID file still present') if core.config['xrootd.gsi'] == "ON": files.restore('/etc/xrootd/xrootd-clustered.cfg', "xrootd") files.restore('/etc/xrootd/auth_file', "xrootd") files.restore('/etc/grid-security/xrd/xrdmapfile', "xrootd")
def test_03_start_pbs_sched(self): core.config['torque.sched-lockfile'] = '/var/lock/subsys/pbs_sched' core.state['torque.pbs-sched-running'] = False if core.missing_rpm(*self.required_rpms): return if os.path.exists(core.config['torque.sched-lockfile']): core.skip('pbs scheduler apparently running') return command = ('service', 'pbs_sched', 'start') stdout, _, fail = core.check_system(command, 'Start pbs scheduler daemon') self.assert_(stdout.find('error') == -1, fail) self.assert_(os.path.exists(core.config['torque.sched-lockfile']), 'pbs sched run lock file missing') core.state['torque.pbs-sched-running'] = True
def config_val(attr): """Query HTCondor for the value of a configuration variable using the python bindings if available, condor_config_val otherwise """ try: import htcondor # Necessary for checking config between different flavors of HTCondor htcondor.reload_config() try: val = htcondor.param[attr] except KeyError: # attr is undefined val = None except: out, _, _ = core.check_system(('condor_config_val', attr), 'Failed to query for config variable: %s' % attr) val = out.strip() return val
def run_blahp_trace(self, lrms): """Run condor_ce_trace() against a non-HTCondor backend and verify the cache""" lrms_cache_prefix = {'pbs': 'qstat', 'slurm': 'slurm'} cwd = os.getcwd() os.chdir('/tmp') command = ('condor_ce_trace', '-a osgTestBatchSystem = %s' % lrms.lower(), '--debug', core.get_hostname()) trace_out, _, _ = core.check_system(command, 'ce trace against %s' % lrms.lower(), user=True) try: backend_jobid = re.search(r'%s_JOBID=(\d+)' % lrms.upper(), trace_out).group(1) except AttributeError: # failed to find backend job ID self.fail('did not run against %s' % lrms.upper()) cache_file = '/var/tmp/%s_cache_%s/blahp_results_cache' % (lrms_cache_prefix[lrms.lower()], core.options.username) with open(cache_file, 'r') as handle: cache = handle.read() # Verify backend job ID in cache for multiple formats between the different # versions of the blahp. For blahp-1.18.16.bosco-1.osg32: # # 2: [BatchJobId="2"; WorkerNode="fermicloud171.fnal.gov-0"; JobStatus=4; ExitCode= 0; ]\n # # For blahp-1.18.25.bosco-1.osg33: # # 5347907 "(dp0 # S'BatchJobId' # p1 # S'""5347907""' # p2 # sS'WorkerNode' # p3 # S'""node1358""' # p4 # sS'JobStatus' # p5 # S'2' # p6 # s." self.assert_(re.search(r'BatchJobId[=\s"\'p1S]+%s' % backend_jobid, cache), 'Job %s not found in %s blahp cache:\n%s' % (backend_jobid, lrms.upper(), cache)) os.chdir(cwd)
def check_execute(statements, message, database=None, exit=0): """Execute MySQL statements and check the exit code `statements` must be a single string, but may contain multiple statements; this will be fed to `mysql` as a script. The trailing `;` is necessary even if executing a single statement. Query output is tab-separated. If `database` is specified, the given database is used. If the return code from the call does not match the expected exit code, an error is raised, and `message` is printed. Return (standard output, standard error, and the failure message generated by core.diagnose()). """ return core.check_system(_get_command(database=database), message, stdin=statements, exit=exit)
def test_01_start_condor(self): core.config['condor.lockfile'] = '/var/lock/subsys/condor_master' core.state['condor.started-service'] = False core.state['condor.running-service'] = False if core.missing_rpm('condor'): return if os.path.exists(core.config['condor.lockfile']): core.state['condor.running-service'] = True core.skip('apparently running') return command = ('service', 'condor', 'start') stdout, _, fail = core.check_system(command, 'Start Condor') self.assert_(stdout.find('error') == -1, fail) self.assert_(os.path.exists(core.config['condor.lockfile']), 'Condor run lock file missing') core.state['condor.started-service'] = True core.state['condor.running-service'] = True
def test_07_xrootd_fetch_from_auth_cache(self): core.skip_ok_unless_installed('globus-proxy-utils', by_dependency=True) self.skip_bad_unless(core.state['proxy.valid'], 'requires a proxy cert') name, contents = self.testfiles[2] path = os.path.join(getcfg("OriginAuthExport"), name) os.environ["XrdSecGSISRVNAMES"] = "*" dest_file = '/tmp/testfileXrootdFromAuthCache' result, _, _ = \ core.check_system(["xrdcp", "-d1","-f", "root://%s:%d/%s" % (core.get_hostname(),getcfg("CacheHTTPSPort"), path), dest_file], "Checking xrootd copy from Authenticated cache", user=True) origin_file = os.path.join(getcfg("OriginRootdir"), getcfg("OriginAuthExport").lstrip("/"), name) checksum_match = files.checksum_files_match(origin_file, dest_file) self.assert_( checksum_match, 'Origin and file downloaded via cache have the same contents')
def run_job_in_tmp_dir(self, command, message, verify_environment=True): tmp_dir = tempfile.mkdtemp() old_cwd = os.getcwd() os.chdir(tmp_dir) os.chmod(tmp_dir, 0o777) try: stdout = core.check_system(command, message, user=True, timeout=600)[0] except osgunittest.TimeoutException: self.fail("Job failed to complete in 10 minute window") if verify_environment: self.verify_job_environment(stdout) os.chdir(old_cwd) shutil.rmtree(tmp_dir)
def test_03_pbs_job(self): if core.missing_rpm('globus-gram-job-manager-pbs', 'globus-gram-client-tools', 'globus-proxy-utils'): return if (not core.state['torque.pbs-configured'] or not core.state['torque.pbs-mom-running'] or not core.state['torque.pbs-server-running'] or not core.state['globus.pbs_configured']): core.skip('pbs not running or configured') return command = ('globus-job-run', self.contact_string('pbs'), '/bin/echo', 'hello') stdout = core.check_system(command, 'globus-job-run on PBS job', user=True)[0] self.assertEqual(stdout, 'hello\n', 'Incorrect output from globus-job-run on PBS job')
def test_04_singularity(self): core.skip_ok_unless_installed('singularity-runtime') core.skip_ok_unless_installed('cvmfs') core.skip_ok_unless_installed('cvmfs-keys', by_dependency=True) singularity_repo = 'singularity.opensciencegrid.org' command = ('ls', '/cvmfs/' + singularity_repo) core.check_system(command, "testing cvmfs access to singularity repo") command = ('ls', self.__cvmfs_image) core.check_system(command, "testing cvmfs access to singularity image") command= ('singularity', 'exec', '--bind', '/cvmfs', self.__cvmfs_image, 'echo', 'working singularity image') core.check_system(command, "singularity checking a file")
def stop(service_name, fail_pattern='FAILED'): """Stop a service via an init script. 'service_name' is used as the base of the keys in the core.config and core.state dictionaries. If we started the service, the init script is run by doing "service init_script stop". The regex 'fail_pattern' is matched against stdout. If there is a match, shutdown is considered to have failed. We also check that the sentinel file, if there was one, no longer exists. Globals used: core.config[service_name.init-script] is used to get the name of the init script. If not set, service_name is used. core.config[service_name.sentinel-file] is used to get the path of the sentinel file. core.state[service_name.started-service] is used to determine if we started the service. After shutdown, this is set to False. """ init_script = core.config.get(service_name + '.init-script', service_name) if not core.state.get(service_name + '.started-service'): core.skip('did not start service ' + service_name) return command = ('service', init_script, 'stop') stdout, _, fail = core.check_system(command, 'Stop ' + service_name + ' service') assert re.search(fail_pattern, stdout) is None, fail sentinel_file = core.config.get(service_name + '.sentinel-file') if sentinel_file: assert not os.path.exists( sentinel_file ), "%(service_name)s sentinel file still exists at %(sentinel_file)s" % locals( ) core.state[service_name + '.started-service'] = False
def test_06_xrootd_fetch_from_origin_auth(self): core.skip_ok_unless_installed('globus-proxy-utils', by_dependency=True) self.skip_bad_unless(core.state['proxy.valid'], 'requires a proxy cert') name, contents = self.testfiles[0] path = os.path.join(getcfg("OriginAuthExport"), name) dest_file = '/tmp/testfileFromOriginAuth' os.environ["XrdSecGSISRVNAMES"] = "*" result, _, _ = core.check_system( [ "xrdcp", "-d1", '-f', "root://localhost:%d/%s" % (getcfg("OriginAuthXrootPort"), path), dest_file ], "Checking xrootd copy from authenticated origin", user=True) origin_file = os.path.join(getcfg("OriginRootdir"), getcfg("OriginAuthExport").lstrip("/"), name) checksum_match = files.checksum_files_match(origin_file, dest_file) self.assert_( checksum_match, 'Origin and directly downloaded file have the same contents')
def test_07_ping_with_gums(self): core.state['condor-ce.gums-auth'] = False self.general_requirements() core.skip_ok_unless_installed('gums-service') # Setting up GUMS auth using the instructions here: # https://opensciencegrid.github.io/docs/compute-element/install-htcondor-ce/#authentication-with-gums hostname = core.get_hostname() lcmaps_contents = '''gumsclient = "lcmaps_gums_client.mod" "-resourcetype ce" "-actiontype execute-now" "-capath /etc/grid-security/certificates" "-cert /etc/grid-security/hostcert.pem" "-key /etc/grid-security/hostkey.pem" "--cert-owner root" # Change this URL to your GUMS server "--endpoint https://%s:8443/gums/services/GUMSXACMLAuthorizationServicePort" verifyproxy = "lcmaps_verify_proxy.mod" "--allow-limited-proxy" " -certdir /etc/grid-security/certificates" # lcmaps policies require at least two modules, so these are here to # fill in if only one module is needed. "good | bad" has no effect. good = "lcmaps_dummy_good.mod" bad = "lcmaps_dummy_bad.mod" authorize_only: ## Policy 1: GUMS but not SAZ (most common, default) gumsclient -> good | bad ''' % hostname gums_properties_contents = '''gums.location=https://%s:8443/gums/services/GUMSAdmin gums.authz=https://%s:8443/gums/services/GUMSXACMLAuthorizationServicePort ''' % (hostname, hostname) core.config['condor-ce.lcmapsdb'] = '/etc/lcmaps.db' core.config['condor-ce.gums-properties'] = '/etc/gums/gums-client.properties' core.config['condor-ce.gsi-authz'] = '/etc/grid-security/gsi-authz.conf' files.write(core.config['condor-ce.lcmapsdb'], lcmaps_contents, owner='condor-ce.gums') files.write(core.config['condor-ce.gums-properties'], gums_properties_contents, owner='condor-ce') files.replace(core.config['condor-ce.gsi-authz'], '# globus_mapping liblcas_lcmaps_gt4_mapping.so lcmaps_callout', 'globus_mapping liblcas_lcmaps_gt4_mapping.so lcmaps_callout', owner='condor-ce') try: core.state['condor-ce.gums-auth'] = True service.check_stop('condor-ce') stat = core.get_stat(core.config['condor-ce.collectorlog']) service.check_start('condor-ce') # Wait for the schedd to come back up self.failUnless(condor.wait_for_daemon(core.config['condor-ce.collectorlog'], stat, 'Schedd', 300.0), 'Schedd failed to restart within the 1 min window') command = ('condor_ce_ping', 'WRITE', '-verbose') stdout, _, _ = core.check_system(command, 'ping using GSI and gridmap', user=True) self.assert_(re.search(r'Authorized:\s*TRUE', stdout), 'could not authorize with GSI') finally: files.restore(core.config['condor-ce.lcmapsdb'], 'condor-ce.gums') files.restore(core.config['condor-ce.gsi-authz'], 'condor-ce') files.restore(core.config['condor-ce.gums-properties'], 'condor-ce')
def config_and_restart(self): self.stop_rsv() core.check_system(('osg-configure', '-c', '-m', 'rsv'), 'osg-configure -c -m rsv') self.start_rsv()
def stop_rsv(self): core.check_system(('rsv-control', '--off'), 'rsv-control --off')
def start_rsv(self): core.check_system(('rsv-control', '--on'), 'rsv-control --on')
def test_01_yum_repositories(self): pre = ('rpm', '--verify', '--quiet', '--nomd5', '--nosize', '--nomtime') core.check_system(pre + ('epel-release', ), 'Verify epel-release') core.check_system(pre + ('osg-release', ), 'Verify osg-release')
def run_metric(self, metric, host=host): command = ('rsv-control', '--run', '--host', host, metric) stdout = core.check_system(command, ' '.join(command))[0] self.assert_(re.search('metricStatus: OK', stdout) is not None) return
def test_03_ping(self): self.general_requirements() command = ('condor_ce_ping', 'WRITE', '-verbose') stdout, _, _ = core.check_system(command, 'ping using GSI and gridmap', user=True) self.assert_(re.search(r'Authorized:\s*TRUE', stdout), 'could not authorize with GSI')
def test_04_grid_proxy_info(self): core.skip_ok_unless_installed('globus-proxy-utils') self.skip_bad_unless(core.state['proxy.created'], 'Proxy creation failed') command = ('grid-proxy-info', '-debug') core.check_system(command, 'Normal grid-proxy-info', user=True)
def test_01_status(self): self.general_requirements() command = ('condor_ce_status', '-any') core.check_system(command, 'ce status', user=True)
def get_transaction_id(): """Grab the latest transaction ID from yum""" command = ('yum', 'history', 'info') history_out = core.check_system(command, 'Get yum Transaction ID')[0] m = re.search('Transaction ID : (\d+)', history_out) return int(m.group(1))
def test_02_queue(self): self.general_requirements() command = ('condor_ce_q', '-verbose') core.check_system(command, 'ce queue', user=True)
def _run_alternatives(java_type, a_input, message): command = ('alternatives', '--config', java_type) stdout, _, _ = core.check_system(command, message, stdin=a_input) return stdout