Example #1
0
    def begin(self):
        super(CephFSMirror, self).begin()
        testdir = misc.get_testdir(self.ctx)

        args = [
            'adjust-ulimits',
            'ceph-coverage',
            '{tdir}/archive/coverage'.format(tdir=testdir),
            'daemon-helper',
            'term',
        ]

        if 'valgrind' in self.config:
            args = get_valgrind_args(
                testdir, 'cephfs-mirror-{id}'.format(id=self.client), args,
                self.config.get('valgrind'))

        args.extend([
            'cephfs-mirror',
            '--cluster',
            self.cluster_name,
            '--id',
            self.client_id,
        ])

        self.ctx.daemons.add_daemon(
            self.remote,
            'cephfs-mirror',
            self.client,
            args=args,
            logger=self.log.getChild(self.client),
            stdin=run.PIPE,
            wait=False,
        )
Example #2
0
    def _add_valgrind_args(self, mount_cmd):
        if self.client_config.get('valgrind') is not None:
            mount_cmd = get_valgrind_args(
                self.test_dir,
                'client.{id}'.format(id=self.client_id),
                mount_cmd,
                self.client_config.get('valgrind'),
                cd=False)

        return mount_cmd
Example #3
0
    def begin(self):
        super(RBDMirror, self).begin()
        testdir = misc.get_testdir(self.ctx)
        daemon_signal = 'kill'
        if 'coverage' in self.config or 'valgrind' in self.config or \
                self.config.get('thrash', False):
            daemon_signal = 'term'

        args = [
            'adjust-ulimits',
            'ceph-coverage',
            '{tdir}/archive/coverage'.format(tdir=testdir),
            'daemon-helper',
            daemon_signal,
        ]

        if 'valgrind' in self.config:
            args = get_valgrind_args(testdir,
                                     'rbd-mirror-{id}'.format(id=self.client),
                                     args, self.config.get('valgrind'))

        args.extend([
            'rbd-mirror',
            '--foreground',
            '--cluster',
            self.cluster_name,
            '--id',
            self.client_id,
        ])

        self.ctx.daemons.add_daemon(
            self.remote,
            'rbd-mirror',
            self.client,
            cluster=self.cluster_name,
            args=args,
            logger=self.log.getChild(self.client),
            stdin=run.PIPE,
            wait=False,
        )
Example #4
0
    def _mount(self, mntopts, check_status):
        log.info("Client client.%s config is %s" %
                 (self.client_id, self.client_config))

        daemon_signal = 'kill'
        if self.client_config.get('coverage') or \
           self.client_config.get('valgrind') is not None:
            daemon_signal = 'term'

        # Use 0000 mode to prevent undesired modifications to the mountpoint on
        # the local file system.
        script = f'mkdir -m 0000 -p -v {self.hostfs_mntpt}'.split()
        stderr = StringIO()
        try:
            self.client_remote.run(args=script,
                                   timeout=(15 * 60),
                                   stderr=StringIO())
        except CommandFailedError:
            if 'file exists' not in stderr.getvalue().lower():
                raise

        run_cmd = [
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            '{tdir}/archive/coverage'.format(tdir=self.test_dir),
            'daemon-helper',
            daemon_signal,
        ]

        fuse_cmd = [
            'ceph-fuse',
            "-f",
            "--admin-socket",
            "/var/run/ceph/$cluster-$name.$pid.asok",
        ]
        if self.client_id is not None:
            fuse_cmd += ['--id', self.client_id]
        if self.client_keyring_path and self.client_id is not None:
            fuse_cmd += ['-k', self.client_keyring_path]
        if self.cephfs_mntpt is not None:
            fuse_cmd += ["--client_mountpoint=" + self.cephfs_mntpt]
        if self.cephfs_name is not None:
            fuse_cmd += ["--client_fs=" + self.cephfs_name]
        if mntopts:
            fuse_cmd += mntopts
        fuse_cmd.append(self.hostfs_mntpt)

        if self.client_config.get('valgrind') is not None:
            run_cmd = get_valgrind_args(
                self.test_dir,
                'client.{id}'.format(id=self.client_id),
                run_cmd,
                self.client_config.get('valgrind'),
                cd=False)

        netns_prefix = [
            'sudo', 'nsenter',
            '--net=/var/run/netns/{0}'.format(self.netns_name)
        ]
        run_cmd = netns_prefix + run_cmd

        run_cmd.extend(fuse_cmd)

        def list_connections():
            conn_dir = "/sys/fs/fuse/connections"

            self.client_remote.run(args=['sudo', 'modprobe', 'fuse'],
                                   check_status=False)
            self.client_remote.run(
                args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir],
                check_status=False,
                timeout=(30))

            try:
                ls_str = self.client_remote.sh("ls " + conn_dir,
                                               stdout=StringIO(),
                                               timeout=(15 * 60)).strip()
            except CommandFailedError:
                return []

            if ls_str:
                return [int(n) for n in ls_str.split("\n")]
            else:
                return []

        # Before starting ceph-fuse process, note the contents of
        # /sys/fs/fuse/connections
        pre_mount_conns = list_connections()
        log.info("Pre-mount connections: {0}".format(pre_mount_conns))

        mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO()
        self.fuse_daemon = self.client_remote.run(
            args=run_cmd,
            logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)),
            stdin=run.PIPE,
            stdout=mountcmd_stdout,
            stderr=mountcmd_stderr,
            wait=False)

        # Wait for the connection reference to appear in /sys
        mount_wait = self.client_config.get('mount_wait', 0)
        if mount_wait > 0:
            log.info(
                "Fuse mount waits {0} seconds before checking /sys/".format(
                    mount_wait))
            time.sleep(mount_wait)
        timeout = int(self.client_config.get('mount_timeout', 30))
        waited = 0

        post_mount_conns = list_connections()
        while len(post_mount_conns) <= len(pre_mount_conns):
            if self.fuse_daemon.finished:
                # Did mount fail?  Raise the CommandFailedError instead of
                # hitting the "failed to populate /sys/" timeout
                try:
                    self.fuse_daemon.wait()
                except CommandFailedError as e:
                    log.info('mount command failed.')
                    if check_status:
                        raise
                    else:
                        return (e, mountcmd_stdout.getvalue(),
                                mountcmd_stderr.getvalue())
            time.sleep(1)
            waited += 1
            if waited > timeout:
                raise RuntimeError(
                    "Fuse mount failed to populate/sys/ after {} "
                    "seconds".format(waited))
            else:
                post_mount_conns = list_connections()

        log.info("Post-mount connections: {0}".format(post_mount_conns))

        # Record our fuse connection number so that we can use it when
        # forcing an unmount
        new_conns = list(set(post_mount_conns) - set(pre_mount_conns))
        if len(new_conns) == 0:
            raise RuntimeError(
                "New fuse connection directory not found ({0})".format(
                    new_conns))
        elif len(new_conns) > 1:
            raise RuntimeError(
                "Unexpectedly numerous fuse connections {0}".format(new_conns))
        else:
            self._fuse_conn = new_conns[0]

        self.gather_mount_info()

        self.mounted = True
Example #5
0
def _run_one_client(ctx, config, role):
    """Spawned task that runs the client"""
    krbd = config.get('krbd', False)
    nbd = config.get('nbd', False)
    testdir = teuthology.get_testdir(ctx)
    (remote, ) = ctx.cluster.only(role).remotes.keys()

    args = []
    if krbd or nbd:
        args.append('sudo')  # rbd(-nbd) map/unmap need privileges
    args.extend([
        'adjust-ulimits', 'ceph-coverage',
        '{tdir}/archive/coverage'.format(tdir=testdir)
    ])

    overrides = ctx.config.get('overrides', {})
    teuthology.deep_merge(config, overrides.get('rbd_fsx', {}))

    if config.get('valgrind'):
        args = get_valgrind_args(testdir, 'fsx_{id}'.format(id=role), args,
                                 config.get('valgrind'))

    cluster_name, type_, client_id = teuthology.split_role(role)
    if type_ != 'client':
        msg = 'client role ({0}) must be a client'.format(role)
        raise ConfigError(msg)

    args.extend([
        'ceph_test_librbd_fsx',
        '--cluster',
        cluster_name,
        '--id',
        client_id,
        '-d',  # debug output for all operations
        '-W',
        '-R',  # mmap doesn't work with rbd
        '-p',
        str(config.get('progress_interval', 100)),  # show progress
        '-P',
        '{tdir}/archive'.format(tdir=testdir),
        '-r',
        str(config.get('readbdy', 1)),
        '-w',
        str(config.get('writebdy', 1)),
        '-t',
        str(config.get('truncbdy', 1)),
        '-h',
        str(config.get('holebdy', 1)),
        '-l',
        str(config.get('size', 250000000)),
        '-S',
        str(config.get('seed', 0)),
        '-N',
        str(config.get('ops', 1000)),
    ])
    if krbd:
        args.append('-K')  # -K enables krbd mode
    if nbd:
        args.append('-M')  # -M enables nbd mode
    if config.get('direct_io', False):
        args.append('-Z')  # -Z use direct IO
    if not config.get('randomized_striping', True):
        args.append('-U')  # -U disables randomized striping
    if not config.get('punch_holes', True):
        args.append('-H')  # -H disables discard ops
    if config.get('deep_copy', False):
        args.append('-g')  # -g deep copy instead of clone
    if config.get('journal_replay', False):
        args.append('-j')  # -j replay all IO events from journal
    if config.get('keep_images', False):
        args.append('-k')  # -k keep images on success
    args.extend([
        config.get('pool_name', 'pool_{pool}'.format(pool=role)),
        'image_{image}'.format(image=role),
    ])

    remote.run(args=args)
Example #6
0
def start_rgw(ctx, config, clients):
    """
    Start rgw on remote sites.
    """
    log.info('Starting rgw...')
    testdir = teuthology.get_testdir(ctx)
    for client in clients:
        (remote, ) = ctx.cluster.only(client).remotes.keys()
        cluster_name, daemon_type, client_id = teuthology.split_role(client)
        client_with_id = daemon_type + '.' + client_id
        client_with_cluster = cluster_name + '.' + client_with_id

        client_config = config.get(client)
        if client_config is None:
            client_config = {}
        log.info("rgw %s config is %s", client, client_config)
        cmd_prefix = [
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            '{tdir}/archive/coverage'.format(tdir=testdir),
            'daemon-helper',
            'term',
        ]

        rgw_cmd = ['radosgw']

        log.info("Using %s as radosgw frontend", ctx.rgw.frontend)

        endpoint = ctx.rgw.role_endpoints[client]
        frontends = ctx.rgw.frontend
        frontend_prefix = client_config.get('frontend_prefix', None)
        if frontend_prefix:
            frontends += ' prefix={pfx}'.format(pfx=frontend_prefix)

        if endpoint.cert:
            # add the ssl certificate path
            frontends += ' ssl_certificate={}'.format(
                endpoint.cert.certificate)
            if ctx.rgw.frontend == 'civetweb':
                frontends += ' port={}s'.format(endpoint.port)
            else:
                frontends += ' ssl_port={}'.format(endpoint.port)
        else:
            frontends += ' port={}'.format(endpoint.port)

        rgw_cmd.extend([
            '--rgw-frontends',
            frontends,
            '-n',
            client_with_id,
            '--cluster',
            cluster_name,
            '-k',
            '/etc/ceph/{client_with_cluster}.keyring'.format(
                client_with_cluster=client_with_cluster),
            '--log-file',
            '/var/log/ceph/rgw.{client_with_cluster}.log'.format(
                client_with_cluster=client_with_cluster),
            '--rgw_ops_log_socket_path',
            '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format(
                tdir=testdir, client_with_cluster=client_with_cluster),
        ])

        keystone_role = client_config.get('use-keystone-role', None)
        if keystone_role is not None:
            if not ctx.keystone:
                raise ConfigError('rgw must run after the keystone task')
            url = 'http://{host}:{port}/v1/KEY_$(tenant_id)s'.format(
                host=endpoint.hostname, port=endpoint.port)
            ctx.keystone.create_endpoint(ctx, keystone_role, 'swift', url)

            keystone_host, keystone_port = \
                ctx.keystone.public_endpoints[keystone_role]
            rgw_cmd.extend([
                '--rgw_keystone_url',
                'http://{khost}:{kport}'.format(khost=keystone_host,
                                                kport=keystone_port),
            ])

        if client_config.get('dns-name') is not None:
            rgw_cmd.extend(['--rgw-dns-name', endpoint.dns_name])
        if client_config.get('dns-s3website-name') is not None:
            rgw_cmd.extend(
                ['--rgw-dns-s3website-name', endpoint.website_dns_name])

        vault_role = client_config.get('use-vault-role', None)
        barbican_role = client_config.get('use-barbican-role', None)
        pykmip_role = client_config.get('use-pykmip-role', None)

        token_path = teuthology.get_testdir(ctx) + '/vault-token'
        if barbican_role is not None:
            if not hasattr(ctx, 'barbican'):
                raise ConfigError('rgw must run after the barbican task')

            barbican_host, barbican_port = \
                ctx.barbican.endpoints[barbican_role]
            log.info("Use barbican url=%s:%s", barbican_host, barbican_port)

            rgw_cmd.extend([
                '--rgw_barbican_url',
                'http://{bhost}:{bport}'.format(bhost=barbican_host,
                                                bport=barbican_port),
            ])
        elif vault_role is not None:
            if not ctx.vault.root_token:
                raise ConfigError('vault: no "root_token" specified')
            # create token on file
            ctx.rgw.vault_role = vault_role
            ctx.cluster.only(client).run(args=[
                'echo', '-n', ctx.vault.root_token,
                run.Raw('>'), token_path
            ])
            log.info("Token file content")
            ctx.cluster.only(client).run(args=['cat', token_path])
            log.info("Restrict access to token file")
            ctx.cluster.only(client).run(args=['chmod', '600', token_path])
            ctx.cluster.only(client).run(
                args=['sudo', 'chown', 'ceph', token_path])

            rgw_cmd.extend([
                '--rgw_crypt_vault_addr',
                "{}:{}".format(*ctx.vault.endpoints[vault_role]),
                '--rgw_crypt_vault_token_file', token_path
            ])
        elif pykmip_role is not None:
            if not hasattr(ctx, 'pykmip'):
                raise ConfigError('rgw must run after the pykmip task')
            ctx.rgw.pykmip_role = pykmip_role
            rgw_cmd.extend([
                '--rgw_crypt_kmip_addr',
                "{}:{}".format(*ctx.pykmip.endpoints[pykmip_role]),
            ])

        rgw_cmd.extend([
            '--foreground',
            run.Raw('|'),
            'sudo',
            'tee',
            '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format(
                client_with_cluster=client_with_cluster),
            run.Raw('2>&1'),
        ])

        if client_config.get('valgrind'):
            cmd_prefix = get_valgrind_args(
                testdir,
                client_with_cluster,
                cmd_prefix,
                client_config.get('valgrind'),
                # see https://github.com/ceph/teuthology/pull/1600
                exit_on_first_error=False)

        run_cmd = list(cmd_prefix)
        run_cmd.extend(rgw_cmd)

        ctx.daemons.add_daemon(
            remote,
            'rgw',
            client_with_id,
            cluster=cluster_name,
            fsid=ctx.ceph[cluster_name].fsid,
            args=run_cmd,
            logger=log.getChild(client),
            stdin=run.PIPE,
            wait=False,
        )

    # XXX: add_daemon() doesn't let us wait until radosgw finishes startup
    for client in clients:
        endpoint = ctx.rgw.role_endpoints[client]
        url = endpoint.url()
        log.info(
            'Polling {client} until it starts accepting connections on {url}'.
            format(client=client, url=url))
        (remote, ) = ctx.cluster.only(client).remotes.keys()
        wait_for_radosgw(url, remote)

    try:
        yield
    finally:
        for client in clients:
            cluster_name, daemon_type, client_id = teuthology.split_role(
                client)
            client_with_id = daemon_type + '.' + client_id
            client_with_cluster = cluster_name + '.' + client_with_id
            ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop()
            ctx.cluster.only(client).run(args=[
                'rm',
                '-f',
                '{tdir}/rgw.opslog.{client}.sock'.format(
                    tdir=testdir, client=client_with_cluster),
            ], )
            ctx.cluster.only(client).run(args=['rm', '-f', token_path])