def test_03_start_slurm(self): core.config['slurm.service-name'] = 'slurm' if core.el_release() == 7: core.config['slurm.service-name'] += 'd' core.config['slurm.ctld-service-name'] = 'slurmctld' core.state['%s.started-service' % core.config['slurm.service-name']] = False self.slurm_reqs() self.skip_ok_if(service.is_running(core.config['slurm.service-name']), 'slurm already running') stat = core.get_stat(CTLD_LOG) if core.el_release() == 7: # slurmctld is handled by /etc/init.d/slurm on EL6 command = ['slurmctld'] core.check_system(command, 'enable slurmctld') service.check_start(core.config['slurm.service-name']) service.check_start(core.config['slurm.ctld-service-name']) else: service.check_start(core.config['slurm.service-name']) core.monitor_file(CTLD_LOG, stat, 'slurm_rpc_node_registration complete for %s' % SHORT_HOSTNAME, 60.0) log_stat = core.get_stat(SLURM_LOG) core.monitor_file(SLURM_LOG, log_stat, 'slurmd started', 60.0) command = ['scontrol', 'update', 'nodename=%s' % SHORT_HOSTNAME, 'state=idle'] core.check_system(command, 'enable slurm node')
def test_03_start_slurm(self): core.config['slurm.service-name'] = 'slurm' if core.el_release() == 7: core.config['slurm.service-name'] += 'd' core.config['slurm.ctld-service-name'] = 'slurmctld' core.state['%s.started-service' % core.config['slurm.service-name']] = False self.slurm_reqs() self.skip_ok_if(service.is_running(core.config['slurm.service-name']), 'slurm already running') stat = core.get_stat(CTLD_LOG) if core.el_release() == 7: # slurmctld is handled by /etc/init.d/slurm on EL6 command = ['slurmctld'] core.check_system(command, 'enable slurmctld') service.check_start(core.config['slurm.service-name']) service.check_start(core.config['slurm.ctld-service-name']) else: service.check_start(core.config['slurm.service-name']) core.monitor_file( CTLD_LOG, stat, 'slurm_rpc_node_registration complete for %s' % SHORT_HOSTNAME, 60.0) log_stat = core.get_stat(SLURM_LOG) core.monitor_file(SLURM_LOG, log_stat, 'slurmd started', 60.0) command = [ 'scontrol', 'update', 'nodename=%s' % SHORT_HOSTNAME, 'state=idle' ] core.check_system(command, 'enable slurm node')
def test_05_start_tomcat(self): core.skip_ok_unless_installed(tomcat.pkgname()) core.state['tomcat.started'] = False catalina_log = tomcat.catalinafile() initial_stat = core.get_stat(catalina_log) tomcat_sentinel = r'Server startup in \d+ ms' # Bump log level core.config['tomcat.logging-conf'] = os.path.join(tomcat.sysconfdir(), 'logging.properties') files.append(core.config['tomcat.logging-conf'], 'org.apache.catalina.level = %s\n' % 'FINER', owner='tomcat', backup=True) old_str = "1catalina.org.apache.juli.FileHandler.prefix = catalina." repl_str = ("1catalina.org.apache.juli.FileHandler.prefix = catalina\n" "1catalina.org.apache.juli.FileHandler.rotatable = false") files.replace(core.config['tomcat.logging-conf'], old_str, repl_str, owner='tomcat', backup=False) service.check_start(tomcat.pkgname()) if core.options.nightly: timeout = 3600.0 else: timeout = 1200.0 line, gap = core.monitor_file(catalina_log, initial_stat, tomcat_sentinel, timeout) self.assert_(line is not None, 'Tomcat did not start within the %d min window' % int(timeout/60)) core.state['tomcat.started'] = True core.log_message('Tomcat started after %.1f seconds' % gap)
def test_18_execute_condor_meter(self): core.state['gratia.condor-meter-running'] = False core.skip_ok_unless_installed('gratia-probe-condor', 'gratia-service', 'htcondor-ce-condor') self.skip_bad_if(core.state['gratia.condor-logs-copied'] == False) self.skip_bad_unless(core.state['condor-ce.started-service'], 'condor-ce not running') self.skip_bad_unless(core.state['condor.running-service'], message='Condor service not running') if os.path.exists(core.config['gratia.log.file']): core.state['gratia.log.stat'] = core.get_stat( core.config['gratia.log.file']) core.log_message('stat.st_ino is: ' + str(core.state['gratia.log.stat'].st_ino)) core.log_message('stat.st_size is: ' + str(core.state['gratia.log.stat'].st_size)) command = ('/usr/share/gratia/condor/condor_meter', ) core.check_system(command, 'Unable to execute condor_meter.') core.config['gratia.condor-temp-dir'] = core.config['gratia.tmpdir.prefix'] + "subdir.condor" + \ core.config['gratia.tmpdir.postfix'] if core.state['gratia.database-installed'] == True: result = self.isProbeOutboxDirEmpty( core.config['gratia.condor-temp-dir']) self.assert_(result, 'condor outbox check failed.') core.state['gratia.condor-meter-running'] = True
def test_06_execute_gridftptransfer_probedriver(self): core.state['gratia.gridftp-transfer-running'] = False core.skip_ok_unless_installed('gratia-probe-gridftp-transfer', 'gratia-service', 'globus-gridftp-server-progs', 'globus-ftp-client', 'globus-proxy-utils', 'globus-gass-copy-progs') self.skip_ok_unless(core.state['gridftp.started-server'], 'gridftp server not running') self.skip_bad_unless(core.state['gratia.gridftp-logs-copied'], 'gridftp logs not copied') if os.path.exists(core.config['gratia.log.file']): core.state['gratia.log.stat'] = core.get_stat( core.config['gratia.log.file']) core.log_message('stat.st_ino is: ' + str(core.state['gratia.log.stat'].st_ino)) core.log_message('stat.st_size is: ' + str(core.state['gratia.log.stat'].st_size)) if core.package_version_compare('gratia-probe-gridftp-transfer', '1.17.0-1') >= 0: probe_script = 'gridftp-transfer_meter' else: probe_script = 'GridftpTransferProbeDriver' command = ('/usr/share/gratia/gridftp-transfer/%s' % probe_script, ) core.check_system(command, 'Unable to execute %s.' % probe_script) core.config['gratia.gridftp-temp-dir'] = core.config['gratia.tmpdir.prefix'] + "subdir.gridftp-transfer" + \ core.config['gratia.tmpdir.postfix'] if core.state['gratia.database-installed'] == True: result = self.isProbeOutboxDirEmpty( core.config['gratia.gridftp-temp-dir']) self.assert_(result, 'gridftp-transfer outbox check failed.') core.state['gratia.gridftp-transfer-running'] = True
def test_04_start_condorce(self): if core.el_release() >= 7: core.config[ 'condor-ce.lockfile'] = '/var/lock/condor-ce/htcondor-ceLock' else: core.config['condor-ce.lockfile'] = '/var/lock/subsys/condor-ce' core.state['condor-ce.started-service'] = False core.state['condor-ce.schedd-ready'] = False core.skip_ok_unless_installed('condor', 'htcondor-ce', 'htcondor-ce-client') core.config['condor-ce.collectorlog'] = condor.ce_config_val( 'COLLECTOR_LOG') if service.is_running('condor-ce'): core.state['condor-ce.schedd-ready'] = True self.skip_ok('already running') stat = core.get_stat(core.config['condor-ce.collectorlog']) service.check_start('condor-ce', timeout=20) if condor.wait_for_daemon(core.config['condor-ce.collectorlog'], stat, 'Schedd', 300.0): core.state['condor-ce.schedd-ready'] = True
def test_26_execute_sge(self): core.skip_ok_unless_installed('gratia-probe-sge', 'gratia-service') core.state['gratia.sge-running'] = False self.skip_bad_if(core.state['gratia.sge-logs-copied'] == False) if os.path.exists(core.config['gratia.log.file']): core.state['gratia.log.stat'] = core.get_stat(core.config['gratia.log.file']) core.log_message('stat.st_ino is: ' + str(core.state['gratia.log.stat'].st_ino)) core.log_message('stat.st_size is: ' + str(core.state['gratia.log.stat'].st_size)) command = ('/usr/share/gratia/sge/sge_meter.cron.sh',) core.check_system(command, 'Unable to execute sge_meter.') core.config['gratia.sge-temp-dir'] = core.config['gratia.tmpdir.prefix'] + "subdir.sge" + \ core.config['gratia.tmpdir.postfix'] if core.state['gratia.database-installed'] == True: result = self.isProbeOutboxDirEmpty(core.config['gratia.sge-temp-dir']) self.assert_(result, 'sge outbox check failed.') core.state['gratia.sge-running'] = True
def test_01_start_condor(self): core.state['condor.running-service'] = False core.skip_ok_unless_installed('condor') core.config['condor.collectorlog'] = condor.config_val('COLLECTOR_LOG') if service.is_running('condor'): core.state['condor.running-service'] = True return core.config['condor.collectorlog_stat'] = core.get_stat( core.config['condor.collectorlog']) service.check_start('condor') core.state['condor.started-service'] = True core.state['condor.running-service'] = True
def test_02_start_slurmdbd(self): core.state['slurmdbd.started-service'] = False core.state['slurmdbd.ready'] = False self.slurm_reqs() self.skip_bad_unless(mysql.is_running(), 'slurmdbd requires mysql') core.config['slurmdbd.config'] = os.path.join( core.config['slurm.config-dir'], 'slurmdbd.conf') core.config['slurmdbd.user'] = "******" core.config['slurmdbd.name'] = "osg_test_slurmdb" mysql.check_execute( "create database %s; " % core.config['slurmdbd.name'], 'create slurmdb') mysql.check_execute("create user %s; " % core.config['slurmdbd.user'], 'add slurmdb user') mysql.check_execute( "grant usage on *.* to %s; " % core.config['slurmdbd.user'], 'slurmdb user access') mysql.check_execute( "grant all privileges on %s.* to %s identified by '%s'; " % (core.config['slurmdbd.name'], core.config['slurmdbd.user'], core.options.password), 'slurmdb user permissions') mysql.check_execute("flush privileges;", 'reload privileges') files.write(core.config['slurmdbd.config'], SLURMDBD_CONFIG.format( name=core.config['slurmdbd.name'], user=core.config['slurmdbd.user'].split('\'')[1], password=core.options.password, port=mysql.PORT), owner='slurm', chmod=0o644) stat = core.get_stat(SLURMDBD_LOG) service.check_start('slurmdbd') sentinel = core.monitor_file(SLURMDBD_LOG, stat, 'slurmdbd version.+started', 30.0) if sentinel: core.state['slurmdbd.ready'] = True # Adding the cluster to the database command = ('sacctmgr', '-i', 'add', 'cluster', CLUSTER_NAME) core.check_system(command, 'add slurm cluster')
def test_18_execute_condor_meter(self): core.state['gratia.condor-meter-running'] = False core.skip_ok_unless_installed('gratia-probe-condor', 'gratia-service', 'htcondor-ce-condor') self.skip_bad_if(core.state['gratia.condor-logs-copied'] == False) self.skip_bad_unless(core.state['condor-ce.started-service'], 'condor-ce not running') self.skip_bad_unless(core.state['condor.running-service'], message='Condor service not running') if os.path.exists(core.config['gratia.log.file']): core.state['gratia.log.stat'] = core.get_stat(core.config['gratia.log.file']) core.log_message('stat.st_ino is: ' + str(core.state['gratia.log.stat'].st_ino)) core.log_message('stat.st_size is: ' + str(core.state['gratia.log.stat'].st_size)) command = ('/usr/share/gratia/condor/condor_meter',) core.check_system(command, 'Unable to execute condor_meter.') core.config['gratia.condor-temp-dir'] = core.config['gratia.tmpdir.prefix'] + "subdir.condor" + \ core.config['gratia.tmpdir.postfix'] if core.state['gratia.database-installed'] == True: result = self.isProbeOutboxDirEmpty(core.config['gratia.condor-temp-dir']) self.assert_(result, 'condor outbox check failed.') core.state['gratia.condor-meter-running'] = True
def test_01_start_condor(self): core.state['condor.started-service'] = False core.state['condor.running-service'] = False core.skip_ok_unless_installed('condor') core.config['condor.collectorlog'] = condor.config_val('COLLECTOR_LOG') if service.is_running('condor'): core.state['condor.running-service'] = True return config_dirs = re.split(r'[, ]+', condor.config_val('LOCAL_CONFIG_DIR')) core.config['condor.personal_condor'] = join(config_dirs[-1], '99-personal-condor.conf') files.write(core.config['condor.personal_condor'], personal_condor_config, owner='condor', chmod=0o644) core.config['condor.collectorlog_stat'] = core.get_stat(core.config['condor.collectorlog']) service.check_start('condor') core.state['condor.started-service'] = True core.state['condor.running-service'] = True
def test_04_start_condorce(self): if core.el_release() >= 7: core.config['condor-ce.lockfile'] = '/var/lock/condor-ce/htcondor-ceLock' else: core.config['condor-ce.lockfile'] = '/var/lock/subsys/condor-ce' core.state['condor-ce.started-service'] = False core.state['condor-ce.schedd-ready'] = False core.skip_ok_unless_installed('condor', 'htcondor-ce', 'htcondor-ce-client') core.config['condor-ce.collectorlog'] = condor.ce_config_val('COLLECTOR_LOG') if service.is_running('condor-ce'): core.state['condor-ce.schedd-ready'] = True self.skip_ok('already running') service.check_start('condor-ce') stat = core.get_stat(core.config['condor-ce.collectorlog']) if condor.wait_for_daemon(core.config['condor-ce.collectorlog'], stat, 'Schedd', 300.0): core.state['condor-ce.schedd-ready'] = True
def test_26_execute_sge(self): core.skip_ok_unless_installed('gratia-probe-sge', 'gratia-service') core.state['gratia.sge-running'] = False self.skip_bad_if(core.state['gratia.sge-logs-copied'] == False) if os.path.exists(core.config['gratia.log.file']): core.state['gratia.log.stat'] = core.get_stat( core.config['gratia.log.file']) core.log_message('stat.st_ino is: ' + str(core.state['gratia.log.stat'].st_ino)) core.log_message('stat.st_size is: ' + str(core.state['gratia.log.stat'].st_size)) command = ('/usr/share/gratia/sge/sge_meter.cron.sh', ) core.check_system(command, 'Unable to execute sge_meter.') core.config['gratia.sge-temp-dir'] = core.config['gratia.tmpdir.prefix'] + "subdir.sge" + \ core.config['gratia.tmpdir.postfix'] if core.state['gratia.database-installed'] == True: result = self.isProbeOutboxDirEmpty( core.config['gratia.sge-temp-dir']) self.assert_(result, 'sge outbox check failed.') core.state['gratia.sge-running'] = True
def test_06_execute_gridftptransfer_probedriver(self): core.state['gratia.gridftp-transfer-running'] = False core.skip_ok_unless_installed('gratia-probe-gridftp-transfer', 'gratia-service', 'globus-gridftp-server-progs', 'globus-ftp-client', 'globus-proxy-utils', 'globus-gass-copy-progs') self.skip_ok_unless(core.state['gridftp.started-server'], 'gridftp server not running') self.skip_bad_unless(core.state['gratia.gridftp-logs-copied'], 'gridftp logs not copied') if os.path.exists(core.config['gratia.log.file']): core.state['gratia.log.stat'] = core.get_stat(core.config['gratia.log.file']) core.log_message('stat.st_ino is: ' + str(core.state['gratia.log.stat'].st_ino)) core.log_message('stat.st_size is: ' + str(core.state['gratia.log.stat'].st_size)) if core.package_version_compare('gratia-probe-gridftp-transfer', '1.17.0-1') >= 0: probe_script = 'gridftp-transfer_meter' else: probe_script = 'GridftpTransferProbeDriver' command = ('/usr/share/gratia/gridftp-transfer/%s' % probe_script,) core.check_system(command, 'Unable to execute %s.' % probe_script) core.config['gratia.gridftp-temp-dir'] = core.config['gratia.tmpdir.prefix'] + "subdir.gridftp-transfer" + \ core.config['gratia.tmpdir.postfix'] if core.state['gratia.database-installed'] == True: result = self.isProbeOutboxDirEmpty(core.config['gratia.gridftp-temp-dir']) self.assert_(result, 'gridftp-transfer outbox check failed.') core.state['gratia.gridftp-transfer-running'] = True
def test_05_start_tomcat(self): core.skip_ok_unless_installed(tomcat.pkgname()) core.state['tomcat.started'] = False catalina_log = tomcat.catalinafile() initial_stat = core.get_stat(catalina_log) tomcat_sentinel = r'Server startup in \d+ ms' # Bump log level core.config['tomcat.logging-conf'] = os.path.join( tomcat.sysconfdir(), 'logging.properties') files.append(core.config['tomcat.logging-conf'], 'org.apache.catalina.level = %s\n' % 'FINER', owner='tomcat', backup=True) old_str = "1catalina.org.apache.juli.FileHandler.prefix = catalina." repl_str = ("1catalina.org.apache.juli.FileHandler.prefix = catalina\n" "1catalina.org.apache.juli.FileHandler.rotatable = false") files.replace(core.config['tomcat.logging-conf'], old_str, repl_str, owner='tomcat', backup=False) service.check_start(tomcat.pkgname()) if core.options.nightly: timeout = 3600.0 else: timeout = 1200.0 line, gap = core.monitor_file(catalina_log, initial_stat, tomcat_sentinel, timeout) self.assert_( line is not None, 'Tomcat did not start within the %d min window' % int(timeout / 60)) core.state['tomcat.started'] = True core.log_message('Tomcat started after %.1f seconds' % gap)
def test_07_ping_with_gums(self): core.state['condor-ce.gums-auth'] = False self.general_requirements() core.skip_ok_unless_installed('gums-service') # Setting up GUMS auth using the instructions here: # https://opensciencegrid.github.io/docs/compute-element/install-htcondor-ce/#authentication-with-gums hostname = core.get_hostname() lcmaps_contents = '''gumsclient = "lcmaps_gums_client.mod" "-resourcetype ce" "-actiontype execute-now" "-capath /etc/grid-security/certificates" "-cert /etc/grid-security/hostcert.pem" "-key /etc/grid-security/hostkey.pem" "--cert-owner root" # Change this URL to your GUMS server "--endpoint https://%s:8443/gums/services/GUMSXACMLAuthorizationServicePort" verifyproxy = "lcmaps_verify_proxy.mod" "--allow-limited-proxy" " -certdir /etc/grid-security/certificates" # lcmaps policies require at least two modules, so these are here to # fill in if only one module is needed. "good | bad" has no effect. good = "lcmaps_dummy_good.mod" bad = "lcmaps_dummy_bad.mod" authorize_only: ## Policy 1: GUMS but not SAZ (most common, default) gumsclient -> good | bad ''' % hostname gums_properties_contents = '''gums.location=https://%s:8443/gums/services/GUMSAdmin gums.authz=https://%s:8443/gums/services/GUMSXACMLAuthorizationServicePort ''' % (hostname, hostname) core.config['condor-ce.lcmapsdb'] = '/etc/lcmaps.db' core.config['condor-ce.gums-properties'] = '/etc/gums/gums-client.properties' core.config['condor-ce.gsi-authz'] = '/etc/grid-security/gsi-authz.conf' files.write(core.config['condor-ce.lcmapsdb'], lcmaps_contents, owner='condor-ce.gums') files.write(core.config['condor-ce.gums-properties'], gums_properties_contents, owner='condor-ce') files.replace(core.config['condor-ce.gsi-authz'], '# globus_mapping liblcas_lcmaps_gt4_mapping.so lcmaps_callout', 'globus_mapping liblcas_lcmaps_gt4_mapping.so lcmaps_callout', owner='condor-ce') try: core.state['condor-ce.gums-auth'] = True service.check_stop('condor-ce') stat = core.get_stat(core.config['condor-ce.collectorlog']) service.check_start('condor-ce') # Wait for the schedd to come back up self.failUnless(condor.wait_for_daemon(core.config['condor-ce.collectorlog'], stat, 'Schedd', 300.0), 'Schedd failed to restart within the 1 min window') command = ('condor_ce_ping', 'WRITE', '-verbose') stdout, _, _ = core.check_system(command, 'ping using GSI and gridmap', user=True) self.assert_(re.search(r'Authorized:\s*TRUE', stdout), 'could not authorize with GSI') finally: files.restore(core.config['condor-ce.lcmapsdb'], 'condor-ce.gums') files.restore(core.config['condor-ce.gsi-authz'], 'condor-ce') files.restore(core.config['condor-ce.gums-properties'], 'condor-ce')
def test_07_ping_with_gums(self): core.state['condor-ce.gums-auth'] = False self.general_requirements() core.skip_ok_unless_installed('gums-service') # Setting up GUMS auth using the instructions here: # https://opensciencegrid.github.io/docs/compute-element/install-htcondor-ce/#authentication-with-gums hostname = core.get_hostname() lcmaps_contents = '''gumsclient = "lcmaps_gums_client.mod" "-resourcetype ce" "-actiontype execute-now" "-capath /etc/grid-security/certificates" "-cert /etc/grid-security/hostcert.pem" "-key /etc/grid-security/hostkey.pem" "--cert-owner root" # Change this URL to your GUMS server "--endpoint https://%s:8443/gums/services/GUMSXACMLAuthorizationServicePort" verifyproxy = "lcmaps_verify_proxy.mod" "--allow-limited-proxy" " -certdir /etc/grid-security/certificates" # lcmaps policies require at least two modules, so these are here to # fill in if only one module is needed. "good | bad" has no effect. good = "lcmaps_dummy_good.mod" bad = "lcmaps_dummy_bad.mod" authorize_only: ## Policy 1: GUMS but not SAZ (most common, default) gumsclient -> good | bad ''' % hostname gums_properties_contents = '''gums.location=https://%s:8443/gums/services/GUMSAdmin gums.authz=https://%s:8443/gums/services/GUMSXACMLAuthorizationServicePort ''' % (hostname, hostname) core.config['condor-ce.lcmapsdb'] = '/etc/lcmaps.db' core.config['condor-ce.gums-properties'] = '/etc/gums/gums-client.properties' core.config['condor-ce.gsi-authz'] = '/etc/grid-security/gsi-authz.conf' files.write(core.config['condor-ce.lcmapsdb'], lcmaps_contents, owner='condor-ce.gums') files.write(core.config['condor-ce.gums-properties'], gums_properties_contents, owner='condor-ce') files.replace(core.config['condor-ce.gsi-authz'], '# globus_mapping liblcas_lcmaps_gt4_mapping.so lcmaps_callout', 'globus_mapping liblcas_lcmaps_gt4_mapping.so lcmaps_callout', owner='condor-ce') try: core.state['condor-ce.gums-auth'] = True service.check_stop('condor-ce') stat = core.get_stat(core.config['condor-ce.collectorlog']) service.check_start('condor-ce') # Wait for the schedd to come back up self.failUnless(condor.wait_for_daemon(core.config['condor-ce.collectorlog'], stat, 'Schedd', 300.0), 'Schedd failed to restart within the 1 min window') command = ('condor_ce_ping', 'WRITE', '-verbose') stdout, _, _ = core.check_system(command, 'ping using GSI and gridmap', user=True) self.assert_(re.search(r'Authorized:\s*TRUE', stdout), 'could not authorize with GSI') finally: files.restore(core.config['condor-ce.lcmapsdb'], 'condor-ce.gums') files.restore(core.config['condor-ce.gsi-authz'], 'condor-ce') files.restore(core.config['condor-ce.gums-properties'], 'condor-ce')