Exemple #1
0
    def test_05_start_pbs(self):
        core.state['pbs_server.started-service'] = False
        core.state['torque.nodes-up'] = False

        core.skip_ok_unless_installed(*self.required_rpms, by_dependency=True)
        self.skip_bad_unless(service.is_running('trqauthd'), 'pbs_server requires trqauthd')
        self.skip_ok_if(service.is_running('pbs_server'), 'pbs server already running')

        server_log = '/var/log/torque/server_logs/' + date.today().strftime('%Y%m%d')
        try:
            server_log_stat = os.stat(server_log)
        except OSError:
            server_log_stat = None

        service.check_start('pbs_server')

        # Wait until the server is up before writing the rest of the config
        core.monitor_file(server_log, server_log_stat, '.*Server Ready.*', 60.0)
        core.check_system("echo '%s' | qmgr %s" % (self.pbs_config, core.get_hostname()),
                          "Configuring pbs server",
                          shell=True)

        # wait up to 5 minutes for the server to recognize the node
        start_time = time.time()
        while (time.time() - start_time) < 600:
            command = ('/usr/bin/qnodes', '-s', core.get_hostname())
            stdout, _, fail = core.check_system(command, 'Get pbs node info')
            self.assert_(stdout.find('error') == -1, fail)
            if stdout.find('state = free'):
                core.state['torque.nodes-up'] = True
                break
        if not core.state['torque.nodes-up']:
            self.fail('PBS nodes not coming up')
    def test_01_create_macaroons(self):
        core.config['xrootd.tpc.macaroon-1'] = None
        core.config['xrootd.tpc.macaroon-2'] = None
        core.skip_ok_unless_installed('x509-scitokens-issuer-client', by_dependency=True)
        self.skip_bad_unless(core.state['proxy.valid'], 'requires a proxy cert')
        uid = pwd.getpwnam(core.options.username)[2]
        usercert = '/tmp/x509up_u%d' % uid
        userkey = '/tmp/x509up_u%d' % uid
        
        core.config['xrootd.tpc.url-1'] = "https://" + core.get_hostname() + ":9001" + "/usr/share/osg-test/test_gridftp_data.txt".strip()
        command = ('macaroon-init', core.config['xrootd.tpc.url-1'], '20', 'ALL')

        status, stdout, stderr = core.system(command, user=True)
        fail = core.diagnose('Obtain Macaroon 1',
                             command, status, stdout, stderr)
        self.assertEqual(status, 0, fail)
        core.config['xrootd.tpc.macaroon-1'] = stdout.strip()

        core.config['xrootd.tpc.url-2'] = "https://" + core.get_hostname() + ":9002" + "/tmp/test_gridftp_data_tpc.txt".strip()
        command = ('macaroon-init', core.config['xrootd.tpc.url-2'], '20', 'ALL')
        status, stdout, stderr = core.system(command, user=True)
        fail = core.diagnose('Obtain Macaroon 2',
                             command, status, stdout, stderr)
        self.assertEqual(status, 0, fail)
        core.config['xrootd.tpc.macaroon-2'] = stdout.strip()
Exemple #3
0
    def test_04_start_pbs(self):
        core.config['torque.pbs-lockfile'] = '/var/lock/subsys/pbs_server'
        core.state['torque.pbs-server-running'] = False
        core.state['torque.pbs-configured'] = False
        core.state['torque.nodes-up'] = False
        if core.el_release() == 5:
            core.config[
                'torque.pbs-nodes-file'] = '/var/torque/server_priv/nodes'
        elif core.el_release() == 6:
            core.config[
                'torque.pbs-nodes-file'] = '/var/lib/torque/server_priv/nodes'
        else:
            core.skip('Distribution version not supported')

        if core.missing_rpm(*self.required_rpms):
            return
        if os.path.exists(core.config['torque.pbs-lockfile']):
            core.skip('pbs server apparently running')
            return

        # add the local node as a compute node
        files.write(core.config['torque.pbs-nodes-file'],
                    "%s np=1\n" % core.get_hostname(),
                    owner='pbs')
        command = ('service', 'pbs_server', 'start')
        stdout, _, fail = core.check_system(command, 'Start pbs server daemon')
        self.assert_(stdout.find('error') == -1, fail)
        self.assert_(os.path.exists(core.config['torque.pbs-lockfile']),
                     'pbs server run lock file missing')
        core.state['torque.pbs-server'] = True
        core.state['torque.pbs-server-running'] = True

        core.check_system("echo '%s' | qmgr %s" %
                          (self.pbs_config, core.get_hostname()),
                          "Configuring pbs server",
                          shell=True)
        core.state['torque.pbs-configured'] = True

        # wait up to 5 minutes for the server to come up and trigger a failure
        # if that doesn't happen
        start_time = time.time()
        while ((time.time() - start_time) < 600):
            command = ('/usr/bin/qnodes', '-s', core.get_hostname())
            stdout, _, fail = core.check_system(command, 'Get pbs node info')
            self.assert_(stdout.find('error') == -1, fail)
            if stdout.find('state = free'):
                core.state['torque.nodes-up'] = True
                break
        if not core.state['torque.nodes-up']:
            self.fail('PBS nodes not coming up')
Exemple #4
0
 def test_08_https_fetch_from_auth_cache(self):
     core.skip_ok_unless_installed('globus-proxy-utils',
                                   'gfal2-plugin-http',
                                   'gfal2-util',
                                   'gfal2-plugin-file',
                                   by_dependency=True)
     self.skip_bad_unless(core.state['proxy.valid'],
                          'requires a proxy cert')
     name, contents = self.testfiles[3]
     path = os.path.join(getcfg("OriginAuthExport"), name)
     dest_file = '/tmp/testfileHTTPsFromAuthCache'
     uid = pwd.getpwnam(core.options.username)[2]
     usercert = '/tmp/x509up_u%d' % uid
     userkey = '/tmp/x509up_u%d' % uid
     result, _, _ = \
         core.check_system(["gfal-copy", "-vf",
                            "--cert", usercert, "--key", userkey,
                            "https://%s:%d%s" % (core.get_hostname(),getcfg("CacheHTTPSPort"), path),
                            "file://%s"%dest_file],
                           "Checking xrootd copy from Authenticated cache", user=True)
     origin_file = os.path.join(getcfg("OriginRootdir"),
                                getcfg("OriginAuthExport").lstrip("/"),
                                name)
     checksum_match = files.checksum_files_match(origin_file, dest_file)
     self.assert_(
         checksum_match,
         'Origin and file downloaded via cache have the same contents')
Exemple #5
0
 def test_01_gratia_admin_webpage (self):
     core.skip_ok_unless_installed('gratia-service')
     self.skip_bad_unless(core.state['tomcat.started'], 'Tomcat not started')
     host = core.get_hostname()
     admin_webpage = 'http://' + host + ':8880/gratia-administration/status.html?wantDetails=0'
     command = ('curl', admin_webpage)
     core.check_system(command, 'Unable to launch gratia admin webpage')
Exemple #6
0
    def test_02_start_mom(self):
        core.config['torque.mom-lockfile'] = '/var/lock/subsys/pbs_mom'
        core.state['torque.pbs-mom-running'] = False

        if core.missing_rpm(*self.required_rpms):
            return
        if os.path.exists(core.config['torque.mom-lockfile']):
            core.skip('pbs mom apparently running')
            return

        if core.el_release() == 5:
            core.config['torque.mom-config'] = '/var/torque/mom_priv/config'
        elif core.el_release() == 6:
            core.config[
                'torque.mom-config'] = '/var/lib/torque/mom_priv/config'
        else:
            core.skip('Distribution version not supported')

        files.write(core.config['torque.mom-config'],
                    "$pbsserver %s\n" % core.get_hostname(),
                    owner='pbs')

        command = ('service', 'pbs_mom', 'start')
        stdout, _, fail = core.check_system(command, 'Start pbs mom daemon')
        self.assert_(stdout.find('error') == -1, fail)
        self.assert_(os.path.exists(core.config['torque.mom-lockfile']),
                     'PBS mom run lock file missing')
        core.state['torque.pbs-mom-running'] = True
Exemple #7
0
    def test_02_condor_ce_run_condor(self):
        core.skip_ok_unless_installed('htcondor-ce', 'htcondor-ce-client',
                                      'htcondor-ce-condor', 'condor')

        self.skip_bad_unless(service.is_running('condor-ce'), 'ce not running')
        self.skip_bad_unless(service.is_running('condor'),
                             'condor not running')
        self.skip_bad_unless(core.state['jobs.env-set'],
                             'job environment not set')
        token_file = core.config['token.condor_write']
        self.skip_bad_unless(
            core.state['proxy.valid'] or os.path.exists(token_file),
            'requires a scitoken or a proxy')

        command = [
            'condor_ce_run', '--debug', '-r',
            '%s:9619' % core.get_hostname(), '/bin/env'
        ]

        if os.path.exists(token_file):
            # FIXME: After HTCONDOR-636 is released (targeted for HTCondor-CE 5.1.2),
            # we can stop setting _condor_SCITOKENS_FILE
            for token_var in ('_condor_SCITOKENS_FILE', 'BEARER_TOKEN_FILE'):
                os.environ[token_var] = token_file
        else:
            core.log_message(
                'condor WRITE token not found; skipping SCITOKENS auth')

        if core.osg_release() == "3.6" and \
           core.PackageVersion('condor') >= '9.0.0' and \
           core.PackageVersion('condor') < '9.0.8':
            with core.no_x509(core.options.username):
                self.run_job_in_tmp_dir(command, 'condor_ce_run a Condor job')
        else:
            self.run_job_in_tmp_dir(command, 'condor_ce_run a Condor job')
    def test_01_request_condor_write_scitoken(self):
        core.state['token.condor_write_created'] = False
        core.config['token.condor_write'] = '/tmp/condor_write.scitoken'

        core.skip_ok_unless_installed('htcondor-ce', 'condor')
        self.skip_ok_if(
            core.PackageVersion('condor') <= '8.9.4',
            'HTCondor version does not support SciToken submission')
        self.skip_ok_if(os.path.exists(core.config['token.condor_write']),
                        'SciToken with HTCondor WRITE already exists')

        hostname = core.get_hostname()
        try:
            token = request_demo_scitoken('condor:/READ condor:/WRITE',
                                          audience=f'{hostname}:9619')
        except error.URLError as exc:
            self.fail(
                f"Failed to request token from demo.scitokens.org:\n{exc}")

        ids = (0, 0)
        if core.state['user.verified']:
            user = pwd.getpwnam(core.options.username)
            ids = (user.pw_uid, user.pw_gid)

        files.write(core.config['token.condor_write'],
                    core.to_str(token),
                    backup=False,
                    chown=ids)
        core.state['token.condor_write_created'] = True
Exemple #9
0
    def test_03_config_parameters(self):
        core.skip_ok_unless_installed('gratia-service')
        core.config['gratia.host'] = core.get_hostname()
        core.config['gratia.config.dir'] = '/etc/gratia'
        # The name of the gratia directory changed
        gratia_version = core.get_package_envra('gratia-service')[2]
        gratia_version_split = gratia_version.split('.')

        if self.tuple_cmp(gratia_version_split, ['1', '13', '5']) < 0:
            core.config['gratia.directory'] = "collector"
        else:
            core.config['gratia.directory'] = "services"

        core.config['certs.httpcert'] = '/etc/grid-security/http/httpcert.pem'
        core.config['certs.httpkey'] = '/etc/grid-security/http/httpkey.pem'

        filename = "/tmp/gratia_reader_pass." + str(os.getpid()) + ".txt"
        contents = "[client]\n" + "password=reader\n"
        files.write(filename, contents, backup=False)
        core.config['gratia.sql.file'] = filename
        core.config['gratia.sql.querystring'] = "\" | mysql --defaults-extra-file=\"" + core.config['gratia.sql.file'] + "\" --skip-column-names -B --unbuffered  --user=reader --port=3306"
        core.config['gratia.tmpdir.prefix'] = "/var/lib/gratia/tmp/gratiafiles/"
        core.config['gratia.tmpdir.postfix'] = "_" + core.config['gratia.host'] + "_" + core.config['gratia.host'] + "_8880"
        core.config['gratia.log.file'] = "/var/log/gratia-service/gratia.log"
        core.state['gratia.log.stat'] = None
Exemple #10
0
    def test_04_configure_pbs(self):
        core.config[
            'torque.pbs-nodes-file'] = '/var/lib/torque/server_priv/nodes'
        core.config[
            'torque.pbs-serverdb'] = '/var/lib/torque/server_priv/serverdb'
        core.skip_ok_unless_installed(*self.required_rpms, by_dependency=True)
        self.skip_bad_unless(service.is_running('trqauthd'),
                             'pbs_server requires trqauthd')
        self.skip_ok_if(service.is_running('pbs_server'),
                        'pbs server already running')

        files.preserve(core.config['torque.pbs-serverdb'], 'pbs')
        if not os.path.exists(core.config['torque.pbs-serverdb']):
            command = (
                '/usr/sbin/pbs_server -d /var/lib/torque -t create -f && '
                'sleep 10 && /usr/bin/qterm')
            stdout, _, fail = core.check_system(
                command, 'create initial pbs serverdb config', shell=True)
            self.assert_(stdout.find('error') == -1, fail)

        # This gets wiped if we write it before the initial 'service pbs_server create'
        # However, this file needs to be in place before the service is started so we
        # restart the service after 'initial configuration'
        files.write(
            core.config[
                'torque.pbs-nodes-file'],  # add the local node as a compute node
            "%s np=1 num_node_boards=1\n" % core.get_hostname(),
            owner='pbs')
Exemple #11
0
    def test_02_condor_ce_run_condor(self):
        core.skip_ok_unless_installed('htcondor-ce', 'htcondor-ce-client',
                                      'htcondor-ce-condor', 'condor')

        self.skip_bad_unless(service.is_running('condor-ce'), 'ce not running')
        self.skip_bad_unless(service.is_running('condor'),
                             'condor not running')
        self.skip_bad_unless(core.state['jobs.env-set'],
                             'job environment not set')
        self.skip_bad_unless(
            core.state['proxy.valid']
            or core.state['token.condor_write_created'],
            'requires a scitoken or a proxy')

        command = [
            'condor_ce_run', '--debug', '-r',
            '%s:9619' % core.get_hostname(), '/bin/env'
        ]

        if core.state['token.condor_write_created']:
            # FIXME: After HTCONDOR-636 is released (targeted for HTCondor-CE 5.1.2),
            # we can stop setting _condor_SCITOKENS_FILE
            for token_var in ('_condor_SCITOKENS_FILE', 'BEARER_TOKEN_FILE'):
                os.environ[token_var] = core.config['token.condor_write']
        else:
            core.log_message(
                'condor WRITE token not found; skipping SCITOKENS auth')

        self.run_job_in_tmp_dir(command, 'condor_ce_run a Condor job')
Exemple #12
0
    def test_03_config_parameters(self):
        core.skip_ok_unless_installed('gratia-service')
        core.config['gratia.host'] = core.get_hostname()
        core.config['gratia.config.dir'] = '/etc/gratia'
        # The name of the gratia directory changed
        gratia_version = core.get_package_envra('gratia-service')[2]
        gratia_version_split = gratia_version.split('.')

        if self.tuple_cmp(gratia_version_split, ['1', '13', '5']) < 0:
            core.config['gratia.directory'] = "collector"
        else:
            core.config['gratia.directory'] = "services"

        core.config['certs.httpcert'] = '/etc/grid-security/http/httpcert.pem'
        core.config['certs.httpkey'] = '/etc/grid-security/http/httpkey.pem'

        filename = "/tmp/gratia_reader_pass." + str(os.getpid()) + ".txt"
        contents = "[client]\n" + "password=reader\n"
        files.write(filename, contents, backup=False)
        core.config['gratia.sql.file'] = filename
        core.config['gratia.sql.querystring'] = "\" | mysql --defaults-extra-file=\"" + core.config['gratia.sql.file'] + "\" --skip-column-names -B --unbuffered  --user=reader --port=3306"
        core.config['gratia.tmpdir.prefix'] = "/var/lib/gratia/tmp/gratiafiles/"
        core.config['gratia.tmpdir.postfix'] = "_" + core.config['gratia.host'] + "_" + core.config['gratia.host'] + "_8880"
        core.config['gratia.log.file'] = "/var/log/gratia-service/gratia.log"
        core.state['gratia.log.stat'] = None
    def run_trace(self, *args):
        """Run condor_ce_trace along with any additional *args. If trace completes with a held job, also return output
        from 'condor_ce_q -held'.
        """

        cwd = os.getcwd()
        os.chdir('/tmp')
        self.command += ['condor_ce_trace', '--debug'
                         ] + list(args) + [core.get_hostname()]
        if core.osg_release() == "3.6" and \
           core.PackageVersion('condor') >= '9.0.0' and \
           core.PackageVersion('condor') < '9.0.8':
            with core.no_x509(core.options.username):
                trace_rc, trace_out, trace_err = core.system(self.command,
                                                             user=True)
        else:
            trace_rc, trace_out, trace_err = core.system(self.command,
                                                         user=True)
        os.chdir(cwd)

        if trace_rc:
            msg = 'condor_ce_trace failed'
            if trace_out.find(', was held'):
                msg = 'condor_ce_trace job held'
                _, hold_out, hold_err = core.system(('condor_ce_q', '-held'))
            self.fail(
                core.diagnose(msg, self.command, trace_rc,
                              str(trace_out) + str(hold_out),
                              str(trace_err) + str(hold_err)))

        return trace_out, trace_err
Exemple #14
0
    def modify_probeconfig(self, probeconfig):
        """This helper method modifies the Probe Configuration, generally needed by many probes"""

        #Backup the existing ProbeConfig, before any modification, so that it can be restored later
        #Note that "owner" has to be a unique string since "ProbeConfig" filename is the same for all probes
        #If ProbeConfig path is: /etc/gratia/gridftp-transfer/ProbeConfig, "owner" = "gridftp-transfer"
        owner = os.path.basename(os.path.dirname(probeconfig))
        files.preserve(probeconfig, owner)

        host = core.get_hostname()
        collectorhost = "    CollectorHost=\"" + host + ":8880\""
        sslhost = "    SSLHost=\"" + host + ":8443\""
        sslregistrationhost = "    SSLRegistrationHost=\"" + host + ":8880\""
        self.patternreplace(probeconfig, "CollectorHost", collectorhost)
        self.patternreplace(probeconfig, "SSLHost", sslhost)
        self.patternreplace(probeconfig, "SSLRegistrationHost",
                            sslregistrationhost)
        self.patternreplace(probeconfig, "SiteName",
                            "SiteName=\"OSG Test site\"")
        self.patternreplace(probeconfig, "EnableProbe", "EnableProbe=\"1\"")
        #If a line with QuarantineUnknownVORecords pattern is not found, insert it after QuarantineSize line
        if self.patternreplace(probeconfig, "QuarantineUnknownVORecords=",
                               "QuarantineUnknownVORecords=\"0\"") == False:
            self.patternreplace(probeconfig,
                                "QuarantineSize=",
                                "QuarantineUnknownVORecords=\"0\"",
                                insert_after=True)
Exemple #15
0
    def test_02_start_mom(self):
        if core.el_release() <= 6:
            core.config['torque.mom-lockfile'] = '/var/lock/subsys/pbs_mom'
        else:
            core.config['torque.mom-lockfile'] = '/var/lib/torque/mom_priv/mom.lock'
        core.state['torque.pbs-mom-running'] = False

        core.skip_ok_unless_installed(*self.required_rpms)
        self.skip_ok_if(os.path.exists(core.config['torque.mom-lockfile']), 'pbs mom apparently running')

        core.config['torque.mom-config'] = '/var/lib/torque/mom_priv/config'
        files.write(core.config['torque.mom-config'],
                    "$pbsserver %s\n" % core.get_hostname(),
                    owner='pbs')
        core.config['torque.mom-layout'] = '/var/lib/torque/mom_priv/mom.layout'
        files.write(core.config['torque.mom-layout'],
                    "nodes=0",
                    owner='pbs')

        command = ('service', 'pbs_mom', 'start')
        stdout, _, fail = core.check_system(command, 'Start pbs mom daemon')
        self.assert_(stdout.find('error') == -1, fail)
        self.assert_(os.path.exists(core.config['torque.mom-lockfile']),
                     'PBS mom run lock file missing')
        core.state['torque.pbs-mom-running'] = True
Exemple #16
0
 def test_01_gratia_admin_webpage(self):
     core.skip_ok_unless_installed('gratia-service')
     self.skip_bad_unless(core.state['tomcat.started'],
                          'Tomcat not started')
     host = core.get_hostname()
     admin_webpage = 'http://' + host + ':8880/gratia-administration/status.html?wantDetails=0'
     command = ('curl', admin_webpage)
     core.check_system(command, 'Unable to launch gratia admin webpage')
Exemple #17
0
    def test_05_my_proxy_retrieval(self):
        core.skip_ok_unless_installed('myproxy', 'myproxy-server')
        self.skip_bad_unless(core.state['myproxy.started-server'], 'MyProxy server failed to start')
        self.skip_bad_unless(core.state['myproxy.created'], 'MyProxy creation failed')

        command = ('myproxy-logon', '--verbose', '-s', core.get_hostname(), '-l', core.options.username)
        password = core.config['myproxy.password'] + '\n'
        core.check_system(command, 'myproxy-logon retrieval', user=True, stdin=password)
    def test_05_my_proxy_retrieval(self):
        core.skip_ok_unless_installed('myproxy', 'myproxy-server')
        self.skip_bad_unless(core.state['myproxy.started-server'], 'MyProxy server failed to start')
        self.skip_bad_unless(core.state['myproxy.created'], 'MyProxy creation failed')

        command = ('myproxy-logon', '--verbose', '-s', core.get_hostname(), '-l', core.options.username)
        password = core.config['myproxy.password'] + '\n'
        core.check_system(command, 'myproxy-logon retrieval', user=True, stdin=password)
Exemple #19
0
    def test_05_condor_ce_run_condor(self):
        core.skip_ok_unless_installed('htcondor-ce', 'htcondor-ce-client', 'htcondor-ce-condor', 'condor')

        self.skip_bad_unless('condor-ce.started', 'ce not started')
        self.skip_bad_unless(core.state['jobs.env-set'], 'job environment not set')

        command = ('condor_ce_run', '-r', '%s:9619' % core.get_hostname(), '/bin/env')
        self.run_job_in_tmp_dir(command, 'condor_ce_run a Condor job')
 def test_06_config_misc_file(self):
     core.skip_ok_unless_installed('osg-info-services')
     core.skip_ok_unless_one_installed(*self.possible_rpms)
     core.config['osg-info-services.misc-file'] = '/etc/osg/config.d/10-misc.ini'
     files.replace(core.config['osg-info-services.misc-file'],
                   'gums_host = DEFAULT',
                   'gums_host = ' + core.get_hostname(),
                   owner='root')
Exemple #21
0
 def test_01_remove_proxy(self):
     core.skip_ok_unless_installed('myproxy', 'myproxy-server')
     self.skip_ok_unless(core.state['myproxy.started-server'], 'MyProxy server failed to start')
     # If there is no pre-existing proxy file, the following command will
     # produce error output and have exit status 1; because this is the
     # expected (but not the only valid) case, do not check the output or
     # exit status.  This test exists only to clear out a pre-existing proxy.
     command = ('myproxy-destroy', '--verbose', '-s', core.get_hostname(), '-l', core.options.username)
     core.system(command, user=True)
 def test_01_remove_proxy(self):
     core.skip_ok_unless_installed('myproxy', 'myproxy-server')
     self.skip_ok_unless(core.state['myproxy.started-server'], 'MyProxy server failed to start')
     # If there is no pre-existing proxy file, the following command will
     # produce error output and have exit status 1; because this is the
     # expected (but not the only valid) case, do not check the output or
     # exit status.  This test exists only to clear out a pre-existing proxy.
     command = ('myproxy-destroy', '--verbose', '-s', core.get_hostname(), '-l', core.options.username)
     core.system(command, user=True)
    def test_03_configure_ce(self):
        core.skip_ok_unless_installed('condor', 'htcondor-ce', 'htcondor-ce-client')

        # Set up Condor, PBS, and Slurm routes
        # Leave the GRIDMAP knob in tact to verify that it works with the LCMAPS VOMS plugin
        core.config['condor-ce.condor-ce-cfg'] = '/etc/condor-ce/config.d/99-osgtest.condor-ce.conf'
        # Add host DN to condor_mapfile
        if core.options.hostcert:
            core.config['condor-ce.condorce_mapfile'] = '/etc/condor-ce/condor_mapfile.osg-test'
            hostcert_dn, _ = cagen.certificate_info(core.config['certs.hostcert'])
            mapfile_contents = files.read('/etc/condor-ce/condor_mapfile')
            mapfile_contents.insert(0, re.sub(r'([/=\.])', r'\\\1', "GSI \"^%s$\" " % hostcert_dn) + \
                                              "%[email protected]\n" % core.get_hostname())
            files.write(core.config['condor-ce.condorce_mapfile'],
                        mapfile_contents,
                        owner='condor-ce',
                        chmod=0o644)
        else:
            core.config['condor-ce.condorce_mapfile'] = '/etc/condor-ce/condor_mapfile'

        condor_contents = """GRIDMAP = /etc/grid-security/grid-mapfile
CERTIFICATE_MAPFILE = %s
ALL_DEBUG=D_FULLDEBUG
JOB_ROUTER_DEFAULTS = $(JOB_ROUTER_DEFAULTS) [set_default_maxMemory = 128;]
JOB_ROUTER_ENTRIES = \\
   [ \\
     GridResource = "batch pbs"; \\
     TargetUniverse = 9; \\
     name = "Local_PBS"; \\
     Requirements = target.osgTestBatchSystem =?= "pbs"; \\
   ] \\
   [ \\
     GridResource = "batch slurm"; \\
     TargetUniverse = 9; \\
     name = "Local_Slurm"; \\
     Requirements = target.osgTestBatchSystem =?= "slurm"; \\
   ] \\
   [ \\
     TargetUniverse = 5; \\
     name = "Local_Condor"; \\
     Requirements = (target.osgTestBatchSystem =!= "pbs" && target.osgTestBatchSystem =!= "slurm"); \\
   ]

JOB_ROUTER_SCHEDD2_SPOOL=/var/lib/condor/spool
JOB_ROUTER_SCHEDD2_NAME=$(FULL_HOSTNAME)
JOB_ROUTER_SCHEDD2_POOL=$(FULL_HOSTNAME):9618
""" % core.config['condor-ce.condorce_mapfile']

        if core.rpm_is_installed('htcondor-ce-view'):
            condor_contents += "\nDAEMON_LIST = $(DAEMON_LIST), CEVIEW, GANGLIAD, SCHEDD"
            core.config['condor-ce.view-port'] = condor.ce_config_val('HTCONDORCE_VIEW_PORT')

        files.write(core.config['condor-ce.condor-ce-cfg'],
                    condor_contents,
                    owner='condor-ce',
                    chmod=0o644)
Exemple #24
0
def advertise_vomses(vo, hostcert='/etc/grid-security/hostcert.pem'):
    """Edit /etc/vomses to advertise the current host as the VOMS server for the given VO.
    Caller is responsible for preserving and restoring /etc/vomses.
    """
    host_dn, _ = cagen.certificate_info(hostcert)
    hostname = core.get_hostname()
    vomses_path = '/etc/vomses'
    contents = ('"%s" "%s" "%d" "%s" "%s"\n' %
                (vo, hostname, VOPORT, host_dn, vo))
    files.write(vomses_path, contents, backup=False, chmod=0o644)
Exemple #25
0
def advertise_lsc(vo, hostcert='/etc/grid-security/hostcert.pem'):
    """Create the VO directory and .lsc file under /etc/grid-security/vomsdir for the given VO"""
    host_dn, host_issuer = cagen.certificate_info(hostcert)
    hostname = core.get_hostname()
    lsc_dir = os.path.join('/etc/grid-security/vomsdir', vo)
    if not os.path.isdir(lsc_dir):
        os.makedirs(lsc_dir)
    vo_lsc_path = os.path.join(lsc_dir, hostname + '.lsc')
    files.write(vo_lsc_path, (host_dn + '\n', host_issuer + '\n'),
                backup=False,
                chmod=0o644)
 def test_07_ceview(self):
     core.config['condor-ce.view-listening'] = False
     self.general_requirements()
     core.skip_ok_unless_installed('htcondor-ce-view')
     view_url = 'http://%s:%s' % (core.get_hostname(), int(core.config['condor-ce.view-port']))
     try:
         src = urllib2.urlopen(view_url).read()
     except urllib2.URLError:
         self.fail('Could not reach HTCondor-CE View at %s' % view_url)
     self.assert_(re.search(r'HTCondor-CE Overview', src), 'Failed to find expected CE View contents')
     core.config['condor-ce.view-listening'] = True
 def test_08_ceview(self):
     core.config['condor-ce.view-listening'] = False
     self.general_requirements()
     core.skip_ok_unless_installed('htcondor-ce-view')
     view_url = 'http://%s:%s' % (core.get_hostname(), int(core.config['condor-ce.view-port']))
     try:
         src = urllib2.urlopen(view_url).read()
     except urllib2.URLError:
         self.fail('Could not reach HTCondor-CE View at %s' % view_url)
     self.assert_(re.search(r'HTCondor-CE Overview', src), 'Failed to find expected CE View contents')
     core.config['condor-ce.view-listening'] = True
    def test_04_trace(self):
        self.general_requirements()
        self.skip_bad_unless(core.state['condor-ce.schedd-ready'], 'CE schedd not ready to accept jobs')

        cwd = os.getcwd()
        os.chdir('/tmp')

        command = ('condor_ce_trace', '--debug', core.get_hostname())
        core.check_system(command, 'ce trace', user=True)

        os.chdir(cwd)
    def test_04_trace(self):
        self.general_requirements()
        self.skip_bad_unless(core.state['condor-ce.schedd-ready'], 'CE schedd not ready to accept jobs')

        cwd = os.getcwd()
        os.chdir('/tmp')

        command = ('condor_ce_trace', '--debug', core.get_hostname())
        core.check_system(command, 'ce trace', user=True)

        os.chdir(cwd)
Exemple #30
0
 def test_03_start_trqauthd(self):
     core.state['trqauthd.started-service'] = False
     core.config['torque.pbs-servername-file'] = '/var/lib/torque/server_name'
     core.skip_ok_unless_installed(*self.required_rpms, by_dependency=True)
     self.skip_ok_if(service.is_running('trqauthd'), 'trqauthd is already running')
     # set hostname as servername instead of localhost
     # config required before starting trqauthd
     files.write(core.config['torque.pbs-servername-file'],
                 "%s" % core.get_hostname(),
                 owner='pbs')
     service.check_start('trqauthd')
Exemple #31
0
 def test_03_start_trqauthd(self):
     core.state['trqauthd.started-service'] = False
     core.config[
         'torque.pbs-servername-file'] = '/var/lib/torque/server_name'
     core.skip_ok_unless_installed(*self.required_rpms, by_dependency=True)
     self.skip_ok_if(service.is_running('trqauthd'),
                     'trqauthd is already running')
     # set hostname as servername instead of localhost
     # config required before starting trqauthd
     files.write(core.config['torque.pbs-servername-file'],
                 "%s" % core.get_hostname(),
                 owner='pbs')
     service.check_start('trqauthd')
    def test_01_create_macaroons(self):
        core.skip_ok_unless_installed('xrootd', 'xrootd-scitokens', 'x509-scitokens-issuer-client', by_dependency=True)
        self.skip_bad_unless(core.state['proxy.created'], 'Proxy creation failed')
        
        uid = pwd.getpwnam(core.options.username)[2]
        usercert = '/tmp/x509up_u%d' % uid
        userkey = '/tmp/x509up_u%d' % uid
        
        core.config['xrootd.tpc.url-1'] = "https://" + core.get_hostname() + ":9001" + "/usr/share/osg-test/test_gridftp_data.txt".strip()
        command = ('macaroon-init', core.config['xrootd.tpc.url-1'], '20', 'DOWNLOAD')

        status, stdout, stderr = core.system(command, user=True)
        fail = core.diagnose('Obtain Macaroon one',
                             command, status, stdout, stderr)
        core.config['xrootd.tpc.macaroon-1'] = stdout.strip()

        core.config['xrootd.tpc.url-2'] = "https://" + core.get_hostname() + ":9002" + "/tmp/test_gridftp_data_tpc.txt".strip()
        command = ('macaroon-init', core.config['xrootd.tpc.url-2'], '20', 'UPLOAD')
        status, stdout, stderr = core.system(command, user=True)
        fail = core.diagnose('Obtain Macaroon number two',
                             command, status, stdout, stderr)
        core.config['xrootd.tpc.macaroon-2'] = stdout.strip()
Exemple #33
0
    def test_02_condor_ce_run_condor(self):
        core.skip_ok_unless_installed('htcondor-ce', 'htcondor-ce-client',
                                      'htcondor-ce-condor', 'condor')

        self.skip_bad_unless(service.is_running('condor-ce'), 'ce not running')
        self.skip_bad_unless(service.is_running('condor'),
                             'condor not running')
        self.skip_bad_unless(core.state['jobs.env-set'],
                             'job environment not set')

        command = ('condor_ce_run', '-r', '%s:9619' % core.get_hostname(),
                   '/bin/env')
        self.run_job_in_tmp_dir(command, 'condor_ce_run a Condor job')
Exemple #34
0
    def test_05_start_pbs(self):
        core.state['pbs_server.started-service'] = False
        core.state['torque.nodes-up'] = False

        core.skip_ok_unless_installed(*self.required_rpms, by_dependency=True)
        self.skip_bad_unless(service.is_running('trqauthd'),
                             'pbs_server requires trqauthd')
        self.skip_ok_if(service.is_running('pbs_server'),
                        'pbs server already running')

        server_log = '/var/log/torque/server_logs/' + date.today().strftime(
            '%Y%m%d')
        try:
            server_log_stat = os.stat(server_log)
        except OSError:
            server_log_stat = None

        service.check_start('pbs_server')

        # Wait until the server is up before writing the rest of the config
        core.monitor_file(server_log, server_log_stat, '.*Server Ready.*',
                          60.0)
        core.check_system("echo '%s' | qmgr %s" %
                          (self.pbs_config, core.get_hostname()),
                          "Configuring pbs server",
                          shell=True)

        # wait up to 5 minutes for the server to recognize the node
        start_time = time.time()
        while (time.time() - start_time) < 600:
            command = ('/usr/bin/qnodes', '-s', core.get_hostname())
            stdout, _, fail = core.check_system(command, 'Get pbs node info')
            self.assert_(stdout.find('error') == -1, fail)
            if stdout.find('state = free'):
                core.state['torque.nodes-up'] = True
                break
        if not core.state['torque.nodes-up']:
            self.fail('PBS nodes not coming up')
    def test_04_myproxy_init(self):
        core.skip_ok_unless_installed('myproxy', 'myproxy-server')
        self.skip_bad_unless(core.state['myproxy.started-server'], 'MyProxy server failed to start')

        core.state['myproxy.created'] = False
        core.config['myproxy.password'] = '******'
        core.skip_ok_unless_installed('myproxy', 'myproxy-server')
        # The -S option is given in the command so it accepts the stdin input for the passowrds
        command = ('myproxy-init', '--verbose', '-C', core.state['proxy.path'], '-y', core.state['proxy.path'],
                   '-s', core.get_hostname(), '-S', '-l', core.options.username)
        # We give an already created proxy to my proxy and password to store it
        password = core.config['myproxy.password']
        core.check_system(command, 'Normal myproxy-init', user=True, stdin=password)
        core.state['myproxy.created'] = True
Exemple #36
0
    def test_01_start_mom(self):
        core.state['pbs_mom.started-service'] = False
        core.skip_ok_unless_installed(*self.required_rpms, by_dependency=True)
        self.skip_ok_if(service.is_running('pbs_mom'),
                        'PBS mom already running')

        core.config['torque.mom-config'] = '/var/lib/torque/mom_priv/config'
        files.write(core.config['torque.mom-config'],
                    "$pbsserver %s\n" % core.get_hostname(),
                    owner='pbs')
        core.config[
            'torque.mom-layout'] = '/var/lib/torque/mom_priv/mom.layout'
        files.write(core.config['torque.mom-layout'], "nodes=0", owner='pbs')
        service.check_start('pbs_mom')
Exemple #37
0
    def test_01_start_mom(self):
        core.state['pbs_mom.started-service'] = False
        core.skip_ok_unless_installed(*self.required_rpms, by_dependency=True)
        self.skip_ok_if(service.is_running('pbs_mom'), 'PBS mom already running')

        core.config['torque.mom-config'] = '/var/lib/torque/mom_priv/config'
        files.write(core.config['torque.mom-config'],
                    "$pbsserver %s\n" % core.get_hostname(),
                    owner='pbs')
        core.config['torque.mom-layout'] = '/var/lib/torque/mom_priv/mom.layout'
        files.write(core.config['torque.mom-layout'],
                    "nodes=0",
                    owner='pbs')
        service.check_start('pbs_mom')
Exemple #38
0
    def test_04_myproxy_init(self):
        core.skip_ok_unless_installed('myproxy', 'myproxy-server')
        self.skip_bad_unless(core.state['myproxy.started-server'], 'MyProxy server failed to start')

        core.state['myproxy.created'] = False
        core.config['myproxy.password'] = '******'
        core.skip_ok_unless_installed('myproxy', 'myproxy-server')
        # The -S option is given in the command so it accepts the stdin input for the passowrds
        command = ('myproxy-init', '--verbose', '-C', core.state['proxy.path'], '-y', core.state['proxy.path'],
                   '-s', core.get_hostname(), '-S', '-l', core.options.username)
        # We give an already created proxy to my proxy and password to store it
        password = core.config['myproxy.password']
        core.check_system(command, 'Normal myproxy-init', user=True, stdin=password)
        core.state['myproxy.created'] = True
 def test_03_configure_globus_pbs(self):
     core.config['globus.pbs-config'] = '/etc/globus/globus-pbs.conf'
     core.state['globus.pbs_configured'] = False
     core.skip_ok_unless_installed('globus-gram-job-manager-pbs')
     config_file = file(core.config['globus.pbs-config']).read()
     server_name = core.get_hostname()
     re_obj = re.compile('^pbs_default=.*$', re.MULTILINE)
     if 'pbs_default' in config_file:
         config_file = re_obj.sub("pbs_default=\"%s\"" % server_name, 
                                  config_file)
     else:
         config_file += "pbs_default=\"%s\"" % server_name
     files.write(core.config['globus.pbs-config'], config_file, owner='pbs')
     core.state['globus.pbs_configured'] = True
 def test_03_configure_globus_pbs(self):
     core.config['globus.pbs-config'] = '/etc/globus/globus-pbs.conf'
     core.state['globus.pbs_configured'] = False
     if not core.rpm_is_installed('globus-gram-job-manager-pbs'):
         return
     config_file = file(core.config['globus.pbs-config']).read()
     server_name = core.get_hostname()
     re_obj = re.compile('^pbs_default=.*$', re.MULTILINE)
     if 'pbs_default' in config_file:
         config_file = re_obj.sub("pbs_default=\"%s\"" % server_name,
                                  config_file)
     else:
         config_file += "pbs_default=\"%s\"" % server_name
     files.write(core.config['globus.pbs-config'], config_file, 'pbs')
     core.state['globus.pbs_configured'] = True
Exemple #41
0
 def test_07_ceview(self):
     core.config['condor-ce.view-listening'] = False
     core.skip_ok_unless_installed('htcondor-ce-view')
     view_url = 'http://%s:%s' % (core.get_hostname(), int(core.config['condor-ce.view-port']))
     try:
         src = core.to_str(urlopen(view_url).read())
         core.log_message(src)
     except EnvironmentError as err:
         debug_file = '/var/log/condor-ce/CEViewLog'
         debug_contents = 'Contents of %s\n%s\n' % (debug_file, '=' * 20)
         try:
             debug_contents += files.read(debug_file, True)
         except EnvironmentError:
             debug_contents += 'Failed to read %s\n' % debug_file
         core.log_message(debug_contents)
         self.fail('Could not reach HTCondor-CE View at %s: %s' % (view_url, err))
     self.assertTrue(re.search(r'HTCondor-CE Overview', src), 'Failed to find expected CE View contents')
     core.config['condor-ce.view-listening'] = True
Exemple #42
0
 def test_07_xrootd_fetch_from_auth_cache(self):
     core.skip_ok_unless_installed('globus-proxy-utils', by_dependency=True)
     self.skip_bad_unless(core.state['proxy.valid'],
                          'requires a proxy cert')
     name, contents = self.testfiles[2]
     path = os.path.join(getcfg("OriginAuthExport"), name)
     os.environ["XrdSecGSISRVNAMES"] = "*"
     dest_file = '/tmp/testfileXrootdFromAuthCache'
     result, _, _ = \
         core.check_system(["xrdcp", "-d1","-f",
                            "root://%s:%d/%s" % (core.get_hostname(),getcfg("CacheHTTPSPort"), path),
                            dest_file], "Checking xrootd copy from Authenticated cache", user=True)
     origin_file = os.path.join(getcfg("OriginRootdir"),
                                getcfg("OriginAuthExport").lstrip("/"),
                                name)
     checksum_match = files.checksum_files_match(origin_file, dest_file)
     self.assert_(
         checksum_match,
         'Origin and file downloaded via cache have the same contents')
Exemple #43
0
    def test_04_configure_pbs(self):
        core.config['torque.pbs-nodes-file'] = '/var/lib/torque/server_priv/nodes'
        core.config['torque.pbs-serverdb'] = '/var/lib/torque/server_priv/serverdb'
        core.skip_ok_unless_installed(*self.required_rpms, by_dependency=True)
        self.skip_bad_unless(service.is_running('trqauthd'), 'pbs_server requires trqauthd')
        self.skip_ok_if(service.is_running('pbs_server'), 'pbs server already running')

        files.preserve(core.config['torque.pbs-serverdb'], 'pbs')
        if not os.path.exists(core.config['torque.pbs-serverdb']):
            command = ('/usr/sbin/pbs_server -d /var/lib/torque -t create -f && '
                       'sleep 10 && /usr/bin/qterm')
            stdout, _, fail = core.check_system(command, 'create initial pbs serverdb config', shell=True)
            self.assert_(stdout.find('error') == -1, fail)

        # This gets wiped if we write it before the initial 'service pbs_server create'
        # However, this file needs to be in place before the service is started so we
        # restart the service after 'initial configuration'
        files.write(core.config['torque.pbs-nodes-file'], # add the local node as a compute node
                    "%s np=1 num_node_boards=1\n" % core.get_hostname(),
                    owner='pbs')
Exemple #44
0
    def modify_probeconfig(self, probeconfig):
        """This helper method modifies the Probe Configuration, generally needed by many probes"""

        #Backup the existing ProbeConfig, before any modification, so that it can be restored later
        #Note that "owner" has to be a unique string since "ProbeConfig" filename is the same for all probes
        #If ProbeConfig path is: /etc/gratia/gridftp-transfer/ProbeConfig, "owner" = "gridftp-transfer"
        owner = os.path.basename(os.path.dirname(probeconfig))
        files.preserve(probeconfig, owner)

        host = core.get_hostname()
        collectorhost = "    CollectorHost=\"" + host + ":8880\""
        sslhost = "    SSLHost=\"" + host + ":8443\""
        sslregistrationhost = "    SSLRegistrationHost=\"" + host + ":8880\""
        self.patternreplace(probeconfig, "CollectorHost", collectorhost)
        self.patternreplace(probeconfig, "SSLHost", sslhost)
        self.patternreplace(probeconfig, "SSLRegistrationHost", sslregistrationhost)
        self.patternreplace(probeconfig, "SiteName", "SiteName=\"OSG Test site\"")
        self.patternreplace(probeconfig, "EnableProbe", "EnableProbe=\"1\"")
        #If a line with QuarantineUnknownVORecords pattern is not found, insert it after QuarantineSize line
        if self.patternreplace(probeconfig, "QuarantineUnknownVORecords=", "QuarantineUnknownVORecords=\"0\"") == False:
            self.patternreplace(probeconfig, "QuarantineSize=", "QuarantineUnknownVORecords=\"0\"", insert_after=True)
 def test_08_config_site_info_file(self):
     core.skip_ok_unless_installed('osg-info-services')
     core.skip_ok_unless_one_installed(*self.possible_rpms)
     core.config['osg-info-services.siteinfo-file'] = '/etc/osg/config.d/40-siteinfo.ini'
     files.replace(core.config['osg-info-services.siteinfo-file'],
                   'group = OSG',
                   'group = OSG-ITB',
                   owner='root')
     files.replace_regexpr(core.config['osg-info-services.siteinfo-file'],
                   'host_name = *',
                   'host_name = ' + core.get_hostname(),
                   backup=False)
     files.replace(core.config['osg-info-services.siteinfo-file'],
                   'sponsor = UNAVAILABLE',
                   'sponsor = mis:100',
                   backup=False)
     files.replace(core.config['osg-info-services.siteinfo-file'],
                   'contact = UNAVAILABLE',
                   'contact = Lando Calrissian',
                   backup=False)
     files.replace(core.config['osg-info-services.siteinfo-file'],
                   'email = UNAVAILABLE',
                   'email = [email protected]',
                   backup=False)
     files.replace(core.config['osg-info-services.siteinfo-file'],
                   'city = UNAVAILABLE',
                   'city = Cloud City',
                   backup=False)
     files.replace(core.config['osg-info-services.siteinfo-file'],
                   'country = UNAVAILABLE',
                   'country = Bespin',
                   backup=False)
     files.replace_regexpr(core.config['osg-info-services.siteinfo-file'],
                           'longitude =*',
                           'longitude = -1',
                           backup=False)
     files.replace(core.config['osg-info-services.siteinfo-file'],
                   'latitude = UNAVAILABLE',
                   'latitude = 45',
                   backup=False)
Exemple #46
0
    def run_trace(self, *args):
        """Run condor_ce_trace along with any additional *args. If trace completes with a held job, also return output
        from 'condor_ce_q -held'.
        """

        cwd = os.getcwd()
        os.chdir('/tmp')
        self.command += ['condor_ce_trace', '--debug'] + list(args) + [core.get_hostname()]
        trace_rc, trace_out, trace_err = core.system(self.command, user=True)
        os.chdir(cwd)

        if trace_rc:
            msg = 'condor_ce_trace failed'
            if trace_out.find(', was held'):
                msg = 'condor_ce_trace job held'
                _, hold_out, hold_err = core.system(('condor_ce_q', '-held'))
            self.fail(core.diagnose(msg,
                                    self.command,
                                    trace_rc,
                                    str(trace_out) + str(hold_out),
                                    str(trace_err) + str(hold_err)))

        return trace_out, trace_err
    def run_blahp_trace(self, lrms):
        """Run condor_ce_trace() against a non-HTCondor backend and verify the cache"""
        lrms_cache_prefix = {'pbs': 'qstat', 'slurm': 'slurm'}

        cwd = os.getcwd()
        os.chdir('/tmp')
        command = ('condor_ce_trace', '-a osgTestBatchSystem = %s' % lrms.lower(), '--debug', core.get_hostname())
        trace_out, _, _ = core.check_system(command, 'ce trace against %s' % lrms.lower(), user=True)

        try:
            backend_jobid = re.search(r'%s_JOBID=(\d+)' % lrms.upper(), trace_out).group(1)
        except AttributeError:
            # failed to find backend job ID
            self.fail('did not run against %s' % lrms.upper())
        cache_file = '/var/tmp/%s_cache_%s/blahp_results_cache' % (lrms_cache_prefix[lrms.lower()],
                                                                   core.options.username)
        with open(cache_file, 'r') as handle:
            cache = handle.read()

        # Verify backend job ID in cache for multiple formats between the different
        # versions of the blahp. For blahp-1.18.16.bosco-1.osg32:
        #
        # 2: [BatchJobId="2"; WorkerNode="fermicloud171.fnal.gov-0"; JobStatus=4; ExitCode= 0; ]\n
        #
        # For blahp-1.18.25.bosco-1.osg33:
        #
        # 5347907	"(dp0
        # S'BatchJobId'
        # p1
        # S'""5347907""'
        # p2
        # sS'WorkerNode'
        # p3
        # S'""node1358""'
        # p4
        # sS'JobStatus'
        # p5
        # S'2'
        # p6
        # s."
        self.assert_(re.search(r'BatchJobId[=\s"\'p1S]+%s' % backend_jobid, cache),
                     'Job %s not found in %s blahp cache:\n%s' % (backend_jobid, lrms.upper(), cache))

        os.chdir(cwd)
    def test_07_ping_with_gums(self):
        core.state['condor-ce.gums-auth'] = False
        self.general_requirements()
        core.skip_ok_unless_installed('gums-service')

        # Setting up GUMS auth using the instructions here:
        # https://opensciencegrid.github.io/docs/compute-element/install-htcondor-ce/#authentication-with-gums
        hostname = core.get_hostname()

        lcmaps_contents = '''gumsclient = "lcmaps_gums_client.mod"
             "-resourcetype ce"
             "-actiontype execute-now"
             "-capath /etc/grid-security/certificates"
             "-cert   /etc/grid-security/hostcert.pem"
             "-key    /etc/grid-security/hostkey.pem"
             "--cert-owner root"
# Change this URL to your GUMS server
             "--endpoint https://%s:8443/gums/services/GUMSXACMLAuthorizationServicePort"

verifyproxy = "lcmaps_verify_proxy.mod"
          "--allow-limited-proxy"
          " -certdir /etc/grid-security/certificates"

# lcmaps policies require at least two modules, so these are here to
#   fill in if only one module is needed.  "good | bad" has no effect.
good        = "lcmaps_dummy_good.mod"
bad         = "lcmaps_dummy_bad.mod"

authorize_only:
## Policy 1: GUMS but not SAZ (most common, default)
gumsclient -> good | bad
''' % hostname

        gums_properties_contents = '''gums.location=https://%s:8443/gums/services/GUMSAdmin
gums.authz=https://%s:8443/gums/services/GUMSXACMLAuthorizationServicePort
''' % (hostname, hostname)

        core.config['condor-ce.lcmapsdb'] = '/etc/lcmaps.db'
        core.config['condor-ce.gums-properties'] = '/etc/gums/gums-client.properties'
        core.config['condor-ce.gsi-authz'] = '/etc/grid-security/gsi-authz.conf'

        files.write(core.config['condor-ce.lcmapsdb'], lcmaps_contents, owner='condor-ce.gums')
        files.write(core.config['condor-ce.gums-properties'], gums_properties_contents, owner='condor-ce')
        files.replace(core.config['condor-ce.gsi-authz'],
                      '# globus_mapping liblcas_lcmaps_gt4_mapping.so lcmaps_callout',
                      'globus_mapping liblcas_lcmaps_gt4_mapping.so lcmaps_callout',
                      owner='condor-ce')
        try:
            core.state['condor-ce.gums-auth'] = True

            service.check_stop('condor-ce')

            stat = core.get_stat(core.config['condor-ce.collectorlog'])

            service.check_start('condor-ce')
            # Wait for the schedd to come back up
            self.failUnless(condor.wait_for_daemon(core.config['condor-ce.collectorlog'], stat, 'Schedd', 300.0),
                            'Schedd failed to restart within the 1 min window')
            command = ('condor_ce_ping', 'WRITE', '-verbose')
            stdout, _, _ = core.check_system(command, 'ping using GSI and gridmap', user=True)
            self.assert_(re.search(r'Authorized:\s*TRUE', stdout), 'could not authorize with GSI')

        finally:
            files.restore(core.config['condor-ce.lcmapsdb'], 'condor-ce.gums')
            files.restore(core.config['condor-ce.gsi-authz'], 'condor-ce')
            files.restore(core.config['condor-ce.gums-properties'], 'condor-ce')
Exemple #49
0
    def test_04_start_pbs(self):
        if core.el_release() <= 6:
            core.config['torque.pbs-lockfile'] = '/var/lock/subsys/pbs_server'
        else:
            core.config['torque.pbs-lockfile'] = '/var/lib/torque/server_priv/server.lock'
        core.state['trqauthd.started-service'] = False
        core.state['torque.pbs-server-running'] = False
        core.state['torque.pbs-server-started'] = False
        core.state['torque.pbs-configured'] = False
        core.state['torque.nodes-up'] = False
        core.config['torque.pbs-nodes-file'] = '/var/lib/torque/server_priv/nodes'
        core.config['torque.pbs-servername-file'] = '/var/lib/torque/server_name'

        core.skip_ok_unless_installed(*self.required_rpms)
        if os.path.exists(core.config['torque.pbs-lockfile']):
            core.state['torque.pbs-server-running'] = True
            self.skip_ok('pbs server apparently running')

        # set hostname as servername instead of localhost
        files.write(core.config['torque.pbs-servername-file'],
                    "%s" % core.get_hostname(),
                    owner='pbs')
        core.state['torque.pbs-configured'] = True

        # trqauthd is required for the pbs_server
        service.start('trqauthd')

        if not os.path.exists('/var/lib/torque/server_priv/serverdb'):
            if core.el_release() <= 6:
                command = 'service pbs_server create' # this creates the default config and starts the service
            else:
                # XXX: "service pbs_server create" doesn't work for systemd, and I haven't found a
                #      systemd equivalent to do the "create" step in el7 ... The following was
                #      distilled from the el6 init.d script:  (but please correct as necessary)
                command = ('/usr/sbin/pbs_server -d /var/lib/torque -t create -f && '
                           'sleep 10 && /usr/bin/qterm')

            stdout, _, fail = core.check_system(command, 'create initial pbs serverdb config', shell=True)
            self.assert_(stdout.find('error') == -1, fail)

        # This gets wiped if we write it before the initial 'service pbs_server create'
        # However, this file needs to be in place before the service is started so we
        # restart the service after 'initial configuration'
        files.write(core.config['torque.pbs-nodes-file'], # add the local node as a compute node
                    "%s np=1 num_node_boards=1\n" % core.get_hostname(),
                    owner='pbs')

        # Sometimes the restart command throws an error on stop but still manages
        # to kill the service, meaning that the service doesn't get brought back up
        command = ('service', 'pbs_server', 'stop')
        core.system(command, 'stop pbs server daemon')

        server_log = '/var/log/torque/server_logs/' + date.today().strftime('%Y%m%d')
        try:
            server_log_stat = os.stat(server_log)
        except OSError:
            server_log_stat = None

        command = ('service', 'pbs_server', 'start')
        stdout, _, fail = core.check_system(command, 'Start pbs server daemon')
        self.assert_(stdout.find('error') == -1, fail)
        self.assert_(os.path.exists(core.config['torque.pbs-lockfile']),
                     'pbs server run lock file missing')
        core.state['torque.pbs-server-started'] = True
        core.state['torque.pbs-server-running'] = True

        # Wait until the server is up before writing the rest of the config
        core.monitor_file(server_log, server_log_stat, '.*Server Ready.*', 60.0)
        core.check_system("echo '%s' | qmgr %s" % (self.pbs_config, core.get_hostname()),
                          "Configuring pbs server",
                          shell=True)

        # wait up to 5 minutes for the server to recognize the node
        start_time = time.time()
        while (time.time() - start_time) < 600:
            command = ('/usr/bin/qnodes', '-s', core.get_hostname())
            stdout, _, fail = core.check_system(command, 'Get pbs node info')
            self.assert_(stdout.find('error') == -1, fail)
            if stdout.find('state = free'):
                core.state['torque.nodes-up'] = True
                break
        if not core.state['torque.nodes-up']:
            self.fail('PBS nodes not coming up')
Exemple #50
0
 def contact_string(self, jobmanager):
     return core.get_hostname() + '/jobmanager-' + jobmanager
Exemple #51
0
import osgtest.library.core as core
import osgtest.library.files as files
import osgtest.library.mysql as mysql
import osgtest.library.osgunittest as osgunittest
import osgtest.library.service as service

import time

CLUSTER_NAME = 'osg_test'
CTLD_LOG = '/var/log/slurm/slurmctld.log'
SLURM_LOG = '/var/log/slurm/slurm.log'
SHORT_HOSTNAME = core.get_hostname().split('.')[0]

SLURMDBD_CONFIG = """AuthType=auth/munge
DbdHost=localhost
SlurmUser=slurm
DebugLevel=debug5
LogFile=/var/log/slurm/slurmdbd.log
StorageType=accounting_storage/mysql
StorageLoc=%(name)s
StorageUser=%(user)s
StoragePass=%(pass)s
"""

SLURM_CONFIG = """AccountingStorageHost=localhost
AccountingStorageLoc=/tmp/slurm_job_accounting.txt
AccountingStorageType=accounting_storage/slurmdbd
AuthType=auth/munge
ClusterName=%(cluster)s
ControlMachine=%(short_hostname)s
JobAcctGatherType=jobacct_gather/linux
    def run_blahp_trace(self, lrms):
        """Run condor_ce_trace() against a non-HTCondor backend and verify the cache"""
        lrms_cache_prefix = {'pbs': 'qstat', 'slurm': 'slurm'}

        cwd = os.getcwd()
        os.chdir('/tmp')
        command = ('condor_ce_trace', '-a osgTestBatchSystem = %s' % lrms.lower(), '--debug', core.get_hostname())
        trace_out, _, _ = core.check_system(command, 'ce trace against %s' % lrms.lower(), user=True)

        try:
            backend_jobid = re.search(r'%s_JOBID=(\d+)' % lrms.upper(), trace_out).group(1)
        except AttributeError:
            # failed to find backend job ID
            self.fail('did not run against %s' % lrms.upper())
        cache_file = '/var/tmp/%s_cache_%s/blahp_results_cache' % (lrms_cache_prefix[lrms.lower()],
                                                                   core.options.username)
        with open(cache_file, 'r') as handle:
            cache = handle.read()

        # Verify backend job ID in cache for multiple formats between the different
        # versions of the blahp. For blahp-1.18.16.bosco-1.osg32:
        #
        # 2: [BatchJobId="2"; WorkerNode="fermicloud171.fnal.gov-0"; JobStatus=4; ExitCode= 0; ]\n
        #
        # For blahp-1.18.25.bosco-1.osg33:
        #
        # 5347907	"(dp0
        # S'BatchJobId'
        # p1
        # S'""5347907""'
        # p2
        # sS'WorkerNode'
        # p3
        # S'""node1358""'
        # p4
        # sS'JobStatus'
        # p5
        # S'2'
        # p6
        # s."
        self.assert_(re.search(r'BatchJobId[=\s"\'p1S]+%s' % backend_jobid, cache),
                     'Job %s not found in %s blahp cache:\n%s' % (backend_jobid, lrms.upper(), cache))

        os.chdir(cwd)
import os
import socket
import shutil
import tempfile
import pwd

import osgtest.library.core as core
import osgtest.library.files as files
import osgtest.library.osgunittest as osgunittest
import osgtest.library.service as service
import osgtest.library.xrootd as xrootd

ERR_AUTH_FAIL = 52
ERR_PERMISSION_DENIED = 54

HOSTNAME = core.get_hostname() or "localhost"


def xrootd_record_failure(fn):
    """Decorator for xrootd tests that sets the core.state['xrootd.had-failures'] flag
    if there were any test failures.

    """
    def inner(*args, **kwargs):
        try:
            return fn(*args, **kwargs)
        except (osgunittest.OkSkipException, osgunittest.BadSkipException,
                osgunittest.ExcludedException):
            raise
        except AssertionError:
            core.state['xrootd.had-failures'] = True