Example #1
0
def setup_dnsmasq(client, name):
    """
    Setup simple dnsmasq name eg: s3.ceph.com
    Local RGW host can then be used with whatever name has been setup with.
    """
    resolv_conf = "nameserver 127.0.0.1\n"
    dnsmasq_template = """address=/{name}/{ip_address}
server=8.8.8.8
server=8.8.4.4
""".format(name=name, ip_address=client.ip_address)
    dnsmasq_config_path = '/etc/dnsmasq.d/ceph'
    # point resolv.conf to local dnsmasq
    misc.sudo_write_file(
        remote=client,
        path='/etc/resolv.conf',
        data=resolv_conf,
    )
    misc.sudo_write_file(
        remote=client,
        path=dnsmasq_config_path,
        data=dnsmasq_template,
    )
    client.run(args=['cat', dnsmasq_config_path])
    # restart dnsmasq
    client.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq'])
    client.run(args=['sudo', 'systemctl', 'status', 'dnsmasq'])
    time.sleep(5)
    # verify dns name is set
    client.run(args=['ping', '-c', '4', name])
Example #2
0
def set_libvirt_secret(devstack_node, ceph_node):
    log.info("Setting libvirt secret...")

    cinder_key_stringio = StringIO()
    ceph_node.run(args=["ceph", "auth", "get-key", "client.cinder"], stdout=cinder_key_stringio)
    cinder_key = cinder_key_stringio.getvalue().strip()

    uuid_stringio = StringIO()
    devstack_node.run(args=["uuidgen"], stdout=uuid_stringio)
    uuid = uuid_stringio.getvalue().strip()

    secret_path = "/tmp/secret.xml"
    secret_template = textwrap.dedent(
        """
    <secret ephemeral='no' private='no'>
        <uuid>{uuid}</uuid>
        <usage type='ceph'>
            <name>client.cinder secret</name>
        </usage>
    </secret>"""
    )
    misc.sudo_write_file(devstack_node, secret_path, secret_template.format(uuid=uuid))
    devstack_node.run(args=["sudo", "virsh", "secret-define", "--file", secret_path])
    devstack_node.run(args=["sudo", "virsh", "secret-set-value", "--secret", uuid, "--base64", cinder_key])
    return uuid
Example #3
0
def set_libvirt_secret(devstack_node, ceph_node):
    log.info("Setting libvirt secret...")

    cinder_key_stringio = StringIO()
    ceph_node.run(args=['sudo', 'ceph', 'auth', 'get-key', 'client.cinder'],
                  stdout=cinder_key_stringio)
    cinder_key = cinder_key_stringio.getvalue().strip()

    uuid_stringio = StringIO()
    devstack_node.run(args=['uuidgen'], stdout=uuid_stringio)
    uuid = uuid_stringio.getvalue().strip()

    secret_path = '/tmp/secret.xml'
    secret_template = textwrap.dedent("""
    <secret ephemeral='no' private='no'>
        <uuid>{uuid}</uuid>
        <usage type='ceph'>
            <name>client.cinder secret</name>
        </usage>
    </secret>""")
    misc.sudo_write_file(devstack_node, secret_path,
                         secret_template.format(uuid=uuid))
    devstack_node.run(args=['sudo', 'virsh', 'secret-define', '--file',
                            secret_path])
    devstack_node.run(args=['sudo', 'virsh', 'secret-set-value', '--secret',
                            uuid, '--base64', cinder_key])
    return uuid
Example #4
0
 def copy_key(from_remote, key_name, to_remote, dest_path, owner):
     key_stringio = StringIO()
     from_remote.run(
         args=['sudo', 'ceph', 'auth', 'get-or-create', key_name],
         stdout=key_stringio)
     key_stringio.seek(0)
     misc.sudo_write_file(to_remote, dest_path,
                          key_stringio, owner=owner)
Example #5
0
def ship_utilities(ctx, config):
    assert config is None
    testdir = teuthology.get_testdir(ctx)
    filenames = []

    log.info('Shipping valgrind.supp...')
    with file(os.path.join(os.path.dirname(__file__), 'valgrind.supp'), 'rb') as f:
        fn = os.path.join(testdir, 'valgrind.supp')
        filenames.append(fn)
        for rem in ctx.cluster.remotes.iterkeys():
            teuthology.sudo_write_file(
                remote=rem,
                path=fn,
                data=f,
                )
            f.seek(0)

    FILES = ['daemon-helper', 'adjust-ulimits', 'kcon_most']
    destdir = '/usr/bin'
    for filename in FILES:
        log.info('Shipping %r...', filename)
        src = os.path.join(os.path.dirname(__file__), filename)
        dst = os.path.join(destdir, filename)
        filenames.append(dst)
        with file(src, 'rb') as f:
            for rem in ctx.cluster.remotes.iterkeys():
                teuthology.sudo_write_file(
                    remote=rem,
                    path=dst,
                    data=f,
                )
                f.seek(0)
                rem.run(
                    args=[
                        'sudo',
                        'chmod',
                        'a=rx',
                        '--',
                        dst,
                    ],
                )

    try:
        yield
    finally:
        log.info('Removing shipped files: %s...', ' '.join(filenames))
        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'rm',
                    '-f',
                    '--',
                ] + list(filenames),
                wait=False,
            ),
        )
Example #6
0
def set_apache_servername(node):
    # Apache complains: "Could not reliably determine the server's fully
    # qualified domain name, using 127.0.0.1 for ServerName"
    # So, let's make sure it knows its name.
    log.info("Setting Apache ServerName...")

    hostname = node.hostname
    config_file = "/etc/apache2/conf.d/servername"
    misc.sudo_write_file(node, config_file, "ServerName {name}".format(name=hostname))
Example #7
0
def install_repo(remote, reposerver, pkgdir, username=None, password=None):
    """
    Install a package repo for reposerver on remote.
    URL will be http if username and password are none, otherwise https.
    pkgdir is the piece path between "reposerver" and "deb" or "rpm"
     (say, 'packages', or 'packages-staging/my-branch', for example).
    so:
        http[s]://[<username>:<password>@]<reposerver>/<pkgdir>/{deb|rpm}
    will be written to deb's apt inktank.list or rpm's inktank.repo
    """

    relmap = _get_relmap(remote)
    log.info('Installing repo on %s', remote)
    if username is None or password is None:
        repo_uri = 'http://{reposerver}/{pkgdir}'
    else:
        repo_uri = 'https://{username}:{password}@{reposerver}/{pkgdir}'

    if relmap['flavor'] == 'deb':
        contents = 'deb ' + repo_uri + '/deb {codename} main'
        contents = contents.format(username=username, password=password,
                                   reposerver=reposerver, pkgdir=pkgdir,
                                   codename=relmap['version'],)
        teuthology.sudo_write_file(remote,
                                   '/etc/apt/sources.list.d/inktank.list',
                                   contents)
        remote.run(args=['sudo',
                         'apt-get',
                         'install',
                         'apt-transport-https',
                         '-y'])
        result = remote.run(args=['sudo', 'apt-get', 'update', '-y'],
                            stdout=StringIO())
        return result

    elif relmap['flavor'] == 'rpm':
        baseurl = repo_uri + '/rpm/{release}{version}'
        contents = textwrap.dedent('''
            [inktank]
            name=Inktank Storage, Inc.
            baseurl={baseurl}
            gpgcheck=1
            enabled=1
            '''.format(baseurl=baseurl))
        contents = contents.format(username=username,
                                   password=password,
                                   pkgdir=pkgdir,
                                   release=relmap['release'],
                                   version=relmap['version'])
        teuthology.sudo_write_file(remote,
                                   '/etc/yum.repos.d/inktank.repo',
                                   contents)
        return remote.run(args=['sudo', 'yum', 'makecache'])

    else:
        return False
Example #8
0
def install_distro_kernel(remote):
    """
    RPM: Find newest kernel on the machine and update grub to use kernel + reboot.
    DEB: Find newest kernel. Parse grub.cfg to figure out the entryname/subentry.
    then modify 01_ceph_kernel to have correct entry + updategrub + reboot.
    """
    system_type = teuthology.get_system_type(remote)
    distribution = ''
    if system_type == 'rpm':
        output, err_mess = StringIO(), StringIO()
        remote.run(args=['rpm', '-q', 'kernel', '--last' ], stdout=output, stderr=err_mess )
        newest=output.getvalue().split()[0].split('kernel-')[1]
        log.info('Distro Kernel Version: {version}'.format(version=newest))
        update_grub_rpm(remote, newest)
        remote.run( args=['sudo', 'shutdown', '-r', 'now'], wait=False )
        output.close()
        err_mess.close()
        return

    if system_type == 'deb':
        distribution = teuthology.get_system_type(remote, distro=True)
        newversion = get_version_from_pkg(remote, distribution)
        if 'ubuntu' in distribution:
            grub2conf = teuthology.get_file(remote, '/boot/grub/grub.cfg', True)
            submenu = ''
            menuentry = ''
            for line in grub2conf.split('\n'):
                if 'submenu' in line:
                    submenu = line.split('submenu ')[1]
                    # Ubuntu likes to be sneaky and change formatting of
                    # grub.cfg between quotes/doublequotes between versions
                    if submenu.startswith("'"):
                        submenu = submenu.split("'")[1]
                    if submenu.startswith('"'):
                        submenu = submenu.split('"')[1]
                if 'menuentry' in line:
                    if newversion in line and 'recovery' not in line:
                        menuentry = line.split('\'')[1]
                        break
            if submenu:
                grubvalue = submenu + '>' + menuentry
            else:
                grubvalue = menuentry
            grubfile = 'cat <<EOF\nset default="' + grubvalue + '"\nEOF'
            teuthology.delete_file(remote, '/etc/grub.d/01_ceph_kernel', sudo=True, force=True)
            teuthology.sudo_write_file(remote, '/etc/grub.d/01_ceph_kernel', StringIO(grubfile), '755')
            log.info('Distro Kernel Version: {version}'.format(version=newversion))
            remote.run(args=['sudo', 'update-grub'])
            remote.run(args=['sudo', 'shutdown', '-r', 'now'], wait=False )
            return

        if 'debian' in distribution:
            grub2_kernel_select_generic(remote, newversion, 'deb')
            log.info('Distro Kernel Version: {version}'.format(version=newversion))
            remote.run( args=['sudo', 'shutdown', '-r', 'now'], wait=False )
            return
Example #9
0
def run_rbd_map(remote, image, iodepth):
    iodepth = max(iodepth, 128)  # RBD_QUEUE_DEPTH_DEFAULT
    out = StringIO.StringIO()
    remote.run(args=['sudo', 'rbd', 'map', '-o', 'queue_depth={}'.format(iodepth), image], stdout=out)
    dev = out.getvalue().rstrip('\n')
    teuthology.sudo_write_file(
        remote,
        '/sys/block/{}/queue/nr_requests'.format(os.path.basename(dev)),
        str(iodepth))
    return dev
def fix_yum_repos(remote, distro):
    """
    For yum calamari installations, the repos.d directory should only
    contain a repo file named rhel<version-number>.repo
    """
    if distro.startswith('centos'):
        # hack alert: detour: install lttng for ceph
        # this works because epel is preinstalled on the vpms
        # this is not a generic solution
        # this is here solely to test the one-off 1.3.0 release for centos6
        remote.run(args="sudo yum -y install lttng-tools")
        cmds = [
            'sudo mkdir /etc/yum.repos.d.old'.split(),
            ['sudo', 'cp', run.Raw('/etc/yum.repos.d/*'),
             '/etc/yum.repos.d.old'],
            ['sudo', 'rm', run.Raw('/etc/yum.repos.d/epel*')],
        ]
        for cmd in cmds:
            if remote.run(args=cmd).exitstatus:
                return False
    else:
        cmds = [
            'sudo mv /etc/yum.repos.d /etc/yum.repos.d.old'.split(),
            'sudo mkdir /etc/yum.repos.d'.split(),
        ]
        for cmd in cmds:
            if remote.run(args=cmd).exitstatus:
                return False

        # map "distroversion" from Remote.os to a tuple of
        # (repo title, repo name descriptor, apt-mirror repo path chunk)
        yum_repo_params = {
            'rhel 6.4': ('rhel6-server', 'RHEL', 'rhel6repo-server'),
            'rhel 6.5': ('rhel6-server', 'RHEL', 'rhel6repo-server'),
            'rhel 7.0': ('rhel7-server', 'RHEL', 'rhel7repo/server'),
        }
        repotitle, reponame, path = yum_repo_params[distro]
        repopath = '/etc/yum.repos.d/%s.repo' % repotitle
        # TO DO:  Make this data configurable too
        repo_contents = '\n'.join(
            ('[%s]' % repotitle,
             'name=%s $releasever - $basearch' % reponame,
             'baseurl=http://apt-mirror.front.sepia.ceph.com/' + path,
             'gpgcheck=0',
             'enabled=1')
        )
        misc.sudo_write_file(remote, repopath, repo_contents)
    cmds = [
        'sudo yum clean all'.split(),
        'sudo yum makecache'.split(),
    ]
    for cmd in cmds:
        if remote.run(args=cmd).exitstatus:
            return False
    return True
Example #11
0
def _get_local_dir(config, remote):
    """
    Extract local directory name from the task lists.
    Copy files over to the remote site.
    """
    ldir = config.get("local", None)
    if ldir:
        remote.run(args=["sudo", "mkdir", "-p", ldir])
        for fyle in os.listdir(ldir):
            fname = "%s/%s" % (ldir, fyle)
            teuthology.sudo_write_file(remote, fname, open(fname).read(), "644")
    return ldir
Example #12
0
def _get_local_dir(config, remote):
    """
    Extract local directory name from the task lists.
    Copy files over to the remote site.
    """
    ldir = config.get('local', None)
    if ldir:
        remote.run(args=['sudo', 'mkdir', '-p', ldir,])
        for fyle in os.listdir(ldir):
            fname = "%s/%s" % (ldir, fyle)
            teuthology.sudo_write_file(remote, fname, open(fname).read(), '644')
    return ldir
Example #13
0
def _disable_default_nginx(remote):
    """
    Fix up nginx values
    """
    script = textwrap.dedent('''
        if [ -f /etc/nginx/conf.d/default.conf ]; then
            mv /etc/nginx/conf.d/default.conf \
                /etc/nginx/conf.d/default.disabled
        fi
        if [ -f /etc/nginx/sites-enabled/default ] ; then
            rm /etc/nginx/sites-enabled/default
        fi
        service nginx restart
        service {service} restart
    ''')
    service = pkg.get_service_name('httpd', remote)
    script = script.format(service=service)
    teuthology.sudo_write_file(remote, '/tmp/disable.nginx', script)
    return remote.run(args=['sudo', 'bash', '/tmp/disable.nginx'],
                      stdout=StringIO())
Example #14
0
def setup_dnsmasq(remote, cnames):
    """ configure dnsmasq on the given remote, adding each cname given """
    log.info('Configuring dnsmasq on remote %s..', remote.name)

    # back up existing resolv.conf
    resolv_conf = misc.get_file(remote, '/etc/resolv.conf')
    # point resolv.conf to local dnsmasq
    misc.sudo_write_file(remote, '/etc/resolv.conf',
                         "nameserver 127.0.0.1\n")

    # add address entries to /etc/dnsmasq.d/ceph
    dnsmasq = "server=8.8.8.8\nserver=8.8.4.4\n"
    address_template = "address=/{cname}/{ip_address}\n"
    for cname, ip_address in cnames.iteritems():
        dnsmasq += address_template.format(cname=cname, ip_address=ip_address)
    misc.sudo_write_file(remote, '/etc/dnsmasq.d/ceph', dnsmasq)

    remote.run(args=['cat', '/etc/dnsmasq.d/ceph'])
    # restart dnsmasq
    remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq'])
    remote.run(args=['sudo', 'systemctl', 'status', 'dnsmasq'])
    # verify dns name is set
    remote.run(args=['ping', '-c', '4', cnames.keys()[0]])

    yield

    log.info('Removing dnsmasq configuration from remote %s..', remote.name)
    # restore resolv.conf
    misc.sudo_write_file(remote, '/etc/resolv.conf', resolv_conf)
    # restart dnsmasq
    remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq'])
Example #15
0
def ship_utilities(ctx, config):
    """
    Write a copy of valgrind.supp to each of the remote sites.  Set executables used
    by Ceph in /usr/local/bin.  When finished (upon exit of the teuthology run), remove
    these files.

    :param ctx: Context
    :param config: Configuration
    """
    assert config is None
    testdir = teuthology.get_testdir(ctx)
    filenames = []

    log.info("Shipping valgrind.supp...")
    with file(os.path.join(os.path.dirname(__file__), "valgrind.supp"), "rb") as f:
        fn = os.path.join(testdir, "valgrind.supp")
        filenames.append(fn)
        for rem in ctx.cluster.remotes.iterkeys():
            teuthology.sudo_write_file(remote=rem, path=fn, data=f)
            f.seek(0)

    FILES = ["daemon-helper", "adjust-ulimits"]
    destdir = "/usr/bin"
    for filename in FILES:
        log.info("Shipping %r...", filename)
        src = os.path.join(os.path.dirname(__file__), filename)
        dst = os.path.join(destdir, filename)
        filenames.append(dst)
        with file(src, "rb") as f:
            for rem in ctx.cluster.remotes.iterkeys():
                teuthology.sudo_write_file(remote=rem, path=dst, data=f)
                f.seek(0)
                rem.run(args=["sudo", "chmod", "a=rx", "--", dst])

    try:
        yield
    finally:
        log.info("Removing shipped files: %s...", " ".join(filenames))
        run.wait(ctx.cluster.run(args=["sudo", "rm", "-f", "--"] + list(filenames), wait=False))
Example #16
0
def update_grub_rpm(remote, newversion):
    """
    Updates grub file to boot new kernel version on both legacy grub/grub2.
    """
    grub='grub2'
    # Check if grub2 is isntalled
    try:
        remote.run(args=['sudo', 'rpm', '-qi', 'grub2'])
    except Exception:
        grub = 'legacy'
    log.info('Updating Grub Version: {grub}'.format(grub=grub))
    if grub == 'legacy':
        data = ''
        #Write new legacy grub entry.
        newgrub = generate_legacy_grub_entry(remote, newversion)
        for line in newgrub:
            data += line + '\n'
        temp_file_path = remote.mktemp()
        teuthology.sudo_write_file(remote, temp_file_path, StringIO(data), '755')
        teuthology.move_file(remote, temp_file_path, '/boot/grub/grub.conf', True)
    else:
        #Update grub menu entry to new version.
        grub2_kernel_select_generic(remote, newversion, 'rpm')
Example #17
0
    def _setup_files(self, return_path_to_files=False, path_prefix='./'):
        dirname = 'dir1'
        regfilename = 'regfile'
        hlinkname = 'hlink'
        slinkname = 'slink1'
        slink2name = 'slink2'

        dir_abspath = path.join(self.mount_a.mountpoint, dirname)
        regfile_abspath = path.join(self.mount_a.mountpoint, regfilename)
        hlink_abspath = path.join(self.mount_a.mountpoint, hlinkname)
        slink_abspath = path.join(self.mount_a.mountpoint, slinkname)
        slink2_abspath = path.join(self.mount_a.mountpoint, slink2name)

        self.mount_a.run_shell('mkdir ' + dir_abspath)
        self.mount_a.run_shell('touch ' + regfile_abspath)
        self.mount_a.run_shell(['ln', regfile_abspath, hlink_abspath])
        self.mount_a.run_shell(['ln', '-s', regfile_abspath, slink_abspath])
        self.mount_a.run_shell(['ln', '-s', dir_abspath, slink2_abspath])

        dir2_name = 'dir2'
        dir21_name = 'dir21'
        regfile121_name = 'regfile121'
        dir2_abspath = path.join(self.mount_a.mountpoint, dir2_name)
        dir21_abspath = path.join(dir2_abspath, dir21_name)
        regfile121_abspath = path.join(dir21_abspath, regfile121_name)
        self.mount_a.run_shell('mkdir -p ' + dir21_abspath)
        self.mount_a.run_shell('touch ' + regfile121_abspath)

        sudo_write_file(self.mount_a.client_remote, regfile_abspath,
                        'somedata')
        sudo_write_file(self.mount_a.client_remote, regfile121_abspath,
                        'somemoredata')

        # TODO: is there a way to trigger/force update ceph.dir.rbytes?
        # wait so that attr ceph.dir.rbytes gets a chance to be updated.
        sleep(20)

        expected_patterns = []
        path_to_files = []

        def append_expected_output_pattern(f):
            if f == '/':
                expected_patterns.append(r'{}{}{}'.format(size, " +", '.' + f))
            else:
                expected_patterns.append(r'{}{}{}'.format(
                    size, " +",
                    path_prefix + path.relpath(f, self.mount_a.mountpoint)))

        for f in [
                dir_abspath, regfile_abspath, regfile121_abspath,
                hlink_abspath, slink_abspath, slink2_abspath
        ]:
            size = humansize(
                self.mount_a.stat(f, follow_symlinks=False)['st_size'])
            append_expected_output_pattern(f)

        # get size for directories containig regfiles within
        for f in [dir2_abspath, dir21_abspath]:
            size = humansize(
                self.mount_a.stat(regfile121_abspath,
                                  follow_symlinks=False)['st_size'])
            append_expected_output_pattern(f)

        # get size for CephFS root
        size = 0
        for f in [
                regfile_abspath, regfile121_abspath, slink_abspath,
                slink2_abspath
        ]:
            size += self.mount_a.stat(f, follow_symlinks=False)['st_size']
        size = humansize(size)
        append_expected_output_pattern('/')

        if return_path_to_files:
            for p in [
                    dir_abspath, regfile_abspath, dir2_abspath, dir21_abspath,
                    regfile121_abspath, hlink_abspath, slink_abspath,
                    slink2_abspath
            ]:
                path_to_files.append(path.relpath(p, self.mount_a.mountpoint))

            return (expected_patterns, path_to_files)
        else:
            return expected_patterns
Example #18
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    try:
        log.info('Building ceph cluster using ceph-deploy...')
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get('branch') is not None:
            cbranch = config.get('branch')
            for var, val in cbranch.iteritems():
                if var == 'testing':
                    ceph_branch = '--{var}'.format(var=var)
                ceph_branch = '--{var}={val}'.format(var=var, val=val)
        node_dev_list = []
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_roles(ctx, config, 'mds')
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_roles(ctx, config, 'mon')
        mon_nodes = " ".join(mon_node)
        new_mon = './ceph-deploy new'+" "+mon_nodes
        install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes
        purge_nodes = './ceph-deploy purge'+" "+all_nodes
        purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes
        mon_hostname = mon_nodes.split(' ')[0]
        mon_hostname = str(mon_hostname)
        gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname
        deploy_mds = './ceph-deploy mds create'+" "+mds_nodes
        no_of_osds = 0

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(ctx, config, new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info('adding config inputs...')
        testdir = teuthology.get_testdir(ctx)
        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
        first_mon = teuthology.get_first_mon(ctx, config)
        (remote,) = ctx.cluster.only(first_mon).remotes.keys()

        lines = None
        if config.get('conf') is not None:
            confp = config.get('conf')
            for section, keys in confp.iteritems():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(remote, conf_path, lines,
                                                sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(remote, conf_path, lines,
                                                    sudo=True)

        estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")

        mon_create_nodes = './ceph-deploy mon create-initial'
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        execute_ceph_deploy(ctx, config, mon_create_nodes)

        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
        max_gather_tries = 90
        gather_tries = 0
        while (estatus_gather != 0):
            gather_tries += 1
            if gather_tries >= max_gather_tries:
                msg = 'ceph-deploy was not able to gatherkeys after 15 minutes'
                raise RuntimeError(msg)
            estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
            time.sleep(10)

        if mds_nodes:
            estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get('test_mon_destroy') is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d]
                estatus_mon_d = execute_ceph_deploy(ctx, config,
                                                    mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")

        node_dev_list = get_dev_for_osd(ctx, config)
        osd_create_cmd = './ceph-deploy osd create --zap-disk '
        for d in node_dev_list:
            if config.get('dmcrypt') is not None:
                osd_create_cmd_d = osd_create_cmd+'--dmcrypt'+" "+d
            else:
                osd_create_cmd_d = osd_create_cmd+d
            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                disks = []
                disks = d.split(':')
                dev_disk = disks[0]+":"+disks[1]
                j_disk = disks[0]+":"+disks[2]
                zap_disk = './ceph-deploy disk zap '+dev_disk+" "+j_disk
                execute_ceph_deploy(ctx, config, zap_disk)
                estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d)
                if estatus_osd == 0:
                    log.info('successfully created osd')
                    no_of_osds += 1
                else:
                    raise RuntimeError("ceph-deploy: Failed to create osds")

        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info('Setting up client nodes...')
            conf_path = '/etc/ceph/ceph.conf'
            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(
                remote=mon0_remote,
                path=conf_path,
                sudo=True,
                )
            admin_keyring = teuthology.get_file(
                remote=mon0_remote,
                path=admin_keyring_path,
                sudo=True,
                )

            clients = ctx.cluster.only(teuthology.is_type('client'))
            for remot, roles_for_host in clients.remotes.iteritems():
                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                    client_keyring = \
                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    mon0_remote.run(
                        args=[
                            'cd',
                            '{tdir}'.format(tdir=testdir),
                            run.Raw('&&'),
                            'sudo', 'bash', '-c',
                            run.Raw('"'), 'ceph',
                            'auth',
                            'get-or-create',
                            'client.{id}'.format(id=id_),
                            'mds', 'allow',
                            'mon', 'allow *',
                            'osd', 'allow *',
                            run.Raw('>'),
                            client_keyring,
                            run.Raw('"'),
                            ],
                        )
                    key_data = teuthology.get_file(
                        remote=mon0_remote,
                        path=client_keyring,
                        sudo=True,
                        )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=client_keyring,
                        data=key_data,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=admin_keyring_path,
                        data=admin_keyring,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=conf_path,
                        data=conf_data,
                        perms='0644'
                    )
        else:
            raise RuntimeError(
                "The cluster is NOT operational due to insufficient OSDs")
        yield

    finally:
        log.info('Stopping ceph...')
        ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
                              'sudo', 'service', 'ceph', 'stop' ])

        # Are you really not running anymore?
        # try first with the init tooling
        # ignoring the status so this becomes informational only
        ctx.cluster.run(args=['sudo', 'status', 'ceph-all', run.Raw('||'),
                              'sudo', 'service',  'ceph', 'status'],
                              check_status=False)

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
                              'grep', '-v', 'grep', run.Raw('|'),
                              'grep', 'ceph'], check_status=False)

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                        ],
                    wait=False,
                    ),
                )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = './ceph-deploy purge'+" "+all_nodes
        purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes

        log.info('Purging package...')
        execute_ceph_deploy(ctx, config, purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(ctx, config, purgedata_nodes)
Example #19
0
def setup_user_bucket(client, dns_name, access_key, secret_key, bucket_name, testdir):
    """
    Create user with access_key and secret_key that will be
    used for the s3a testdir
    """
    client.run(
        args=[
            'sudo',
            'radosgw-admin',
            'user',
            'create',
            run.Raw('--uid'),
            's3a',
            run.Raw('--display-name=s3a cephtests'),
            run.Raw('--access-key={access_key}'.format(access_key=access_key)),
            run.Raw('--secret-key={secret_key}'.format(secret_key=secret_key)),
            run.Raw('[email protected]'),
        ]
    )
    client.run(
        args=[
            'virtualenv',
            '{testdir}/venv'.format(testdir=testdir),
            run.Raw('&&'),
            run.Raw('{testdir}/venv/bin/pip'.format(testdir=testdir)),
            'install',
            'boto'
        ]
    )
    create_bucket = """
#!/usr/bin/env python
import boto
import boto.s3.connection
access_key = '{access_key}'
secret_key = '{secret_key}'

conn = boto.connect_s3(
        aws_access_key_id = access_key,
        aws_secret_access_key = secret_key,
        host = '{dns_name}',
        is_secure=False,
        calling_format = boto.s3.connection.OrdinaryCallingFormat(),
        )
bucket = conn.create_bucket('{bucket_name}')
for bucket in conn.get_all_buckets():
        print bucket.name + "\t" + bucket.creation_date
""".format(access_key=access_key, secret_key=secret_key, dns_name=dns_name, bucket_name=bucket_name)
    py_bucket_file = '{testdir}/create_bucket.py'.format(testdir=testdir)
    misc.sudo_write_file(
        remote=client,
        path=py_bucket_file,
        data=create_bucket,
        perms='0744',
        )
    client.run(
        args=[
            'cat',
            '{testdir}/create_bucket.py'.format(testdir=testdir),
        ]
    )
    client.run(
        args=[
            '{testdir}/venv/bin/python'.format(testdir=testdir),
            '{testdir}/create_bucket.py'.format(testdir=testdir),
        ]
    )
Example #20
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    try:
        log.info("Building ceph cluster using ceph-deploy...")
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get("branch") is not None:
            cbranch = config.get("branch")
            for var, val in cbranch.iteritems():
                if var == "testing":
                    ceph_branch = "--{var}".format(var=var)
                ceph_branch = "--{var}={val}".format(var=var, val=val)
        node_dev_list = []
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_roles(ctx, config, "mds")
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_roles(ctx, config, "mon")
        mon_nodes = " ".join(mon_node)
        new_mon = "./ceph-deploy new" + " " + mon_nodes
        install_nodes = "./ceph-deploy install " + ceph_branch + " " + all_nodes
        purge_nodes = "./ceph-deploy purge" + " " + all_nodes
        purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes
        mon_hostname = mon_nodes.split(" ")[0]
        mon_hostname = str(mon_hostname)
        gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname
        deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes
        no_of_osds = 0

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(ctx, config, new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info("adding config inputs...")
        testdir = teuthology.get_testdir(ctx)
        conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir)
        first_mon = teuthology.get_first_mon(ctx, config)
        (remote,) = ctx.cluster.only(first_mon).remotes.keys()

        lines = None
        if config.get("conf") is not None:
            confp = config.get("conf")
            for section, keys in confp.iteritems():
                lines = "[{section}]\n".format(section=section)
                teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = "{key} = {value}\n".format(key=key, value=value)
                    teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)

        estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")

        mon_create_nodes = "./ceph-deploy mon create-initial"
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)

        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
        max_gather_tries = 90
        gather_tries = 0
        while estatus_gather != 0:
            gather_tries += 1
            if gather_tries >= max_gather_tries:
                msg = "ceph-deploy was not able to gatherkeys after 15 minutes"
                raise RuntimeError(msg)
            estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
            time.sleep(10)

        if mds_nodes:
            estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get("test_mon_destroy") is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d]
                estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")

        node_dev_list = get_dev_for_osd(ctx, config)
        osd_create_cmd = "./ceph-deploy osd create --zap-disk "
        for d in node_dev_list:
            if config.get("dmcrypt") is not None:
                osd_create_cmd_d = osd_create_cmd + "--dmcrypt" + " " + d
            else:
                osd_create_cmd_d = osd_create_cmd + d
            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d)
            if estatus_osd == 0:
                log.info("successfully created osd")
                no_of_osds += 1
            else:
                disks = []
                disks = d.split(":")
                dev_disk = disks[0] + ":" + disks[1]
                j_disk = disks[0] + ":" + disks[2]
                zap_disk = "./ceph-deploy disk zap " + dev_disk + " " + j_disk
                execute_ceph_deploy(ctx, config, zap_disk)
                estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d)
                if estatus_osd == 0:
                    log.info("successfully created osd")
                    no_of_osds += 1
                else:
                    raise RuntimeError("ceph-deploy: Failed to create osds")

        if config.get("wait-for-healthy", True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info("Setting up client nodes...")
            conf_path = "/etc/ceph/ceph.conf"
            admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring"
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True)
            admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True)

            clients = ctx.cluster.only(teuthology.is_type("client"))
            for remot, roles_for_host in clients.remotes.iteritems():
                for id_ in teuthology.roles_of_type(roles_for_host, "client"):
                    client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_)
                    mon0_remote.run(
                        args=[
                            "cd",
                            "{tdir}".format(tdir=testdir),
                            run.Raw("&&"),
                            "sudo",
                            "bash",
                            "-c",
                            run.Raw('"'),
                            "ceph",
                            "auth",
                            "get-or-create",
                            "client.{id}".format(id=id_),
                            "mds",
                            "allow",
                            "mon",
                            "allow *",
                            "osd",
                            "allow *",
                            run.Raw(">"),
                            client_keyring,
                            run.Raw('"'),
                        ]
                    )
                    key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True)
                    teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644")
                    teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644")
                    teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644")
        else:
            raise RuntimeError("The cluster is NOT operational due to insufficient OSDs")
        yield

    finally:
        log.info("Stopping ceph...")
        ctx.cluster.run(args=["sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop"])

        # Are you really not running anymore?
        # try first with the init tooling
        # ignoring the status so this becomes informational only
        ctx.cluster.run(
            args=["sudo", "status", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "status"], check_status=False
        )

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(
            args=["sudo", "ps", "aux", run.Raw("|"), "grep", "-v", "grep", run.Raw("|"), "grep", "ceph"],
            check_status=False,
        )

        if ctx.archive is not None:
            # archive mon data, too
            log.info("Archiving mon data...")
            path = os.path.join(ctx.archive, "data")
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type("mon"))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith("mon."):
                        teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz")

            log.info("Compressing logs...")
            run.wait(
                ctx.cluster.run(
                    args=[
                        "sudo",
                        "find",
                        "/var/log/ceph",
                        "-name",
                        "*.log",
                        "-print0",
                        run.Raw("|"),
                        "sudo",
                        "xargs",
                        "-0",
                        "--no-run-if-empty",
                        "--",
                        "gzip",
                        "--",
                    ],
                    wait=False,
                )
            )

            log.info("Archiving logs...")
            path = os.path.join(ctx.archive, "remote")
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log"))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = "./ceph-deploy purge" + " " + all_nodes
        purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes

        log.info("Purging package...")
        execute_ceph_deploy(ctx, config, purge_nodes)
        log.info("Purging data...")
        execute_ceph_deploy(ctx, config, purgedata_nodes)
Example #21
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""
    log.info("Building ceph cluster using ceph-deploy...")
    testdir = teuthology.get_testdir(ctx)
    ceph_branch = None
    if config.get("branch") is not None:
        cbranch = config.get("branch")
        for var, val in cbranch.iteritems():
            if var == "testing":
                ceph_branch = "--{var}".format(var=var)
            ceph_branch = "--{var}={val}".format(var=var, val=val)
    node_dev_list = []
    all_nodes = get_all_nodes(ctx, config)
    mds_nodes = get_nodes_using_roles(ctx, config, "mds")
    mds_nodes = " ".join(mds_nodes)
    mon_node = get_nodes_using_roles(ctx, config, "mon")
    mon_nodes = " ".join(mon_node)
    new_mon = "./ceph-deploy new" + " " + mon_nodes
    install_nodes = "./ceph-deploy install " + ceph_branch + " " + all_nodes
    purge_nodes = "./ceph-deploy purge" + " " + all_nodes
    purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes
    mon_hostname = mon_nodes.split(" ")[0]
    mon_hostname = str(mon_hostname)
    gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname
    deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes
    no_of_osds = 0

    if mon_nodes is None:
        raise RuntimeError("no monitor nodes in the config file")

    estatus_new = execute_ceph_deploy(ctx, config, new_mon)
    if estatus_new != 0:
        raise RuntimeError("ceph-deploy: new command failed")

    log.info("adding config inputs...")
    testdir = teuthology.get_testdir(ctx)
    conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir)
    first_mon = teuthology.get_first_mon(ctx, config)
    (remote,) = ctx.cluster.only(first_mon).remotes.keys()

    lines = None
    if config.get("conf") is not None:
        confp = config.get("conf")
        for section, keys in confp.iteritems():
            lines = "[{section}]\n".format(section=section)
            teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)
            for key, value in keys.iteritems():
                log.info("[%s] %s = %s" % (section, key, value))
                lines = "{key} = {value}\n".format(key=key, value=value)
                teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)

    estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
    if estatus_install != 0:
        raise RuntimeError("ceph-deploy: Failed to install ceph")

    mon_no = None
    mon_no = config.get("mon_initial_members")
    if mon_no is not None:
        i = 0
        mon1 = []
        while i < mon_no:
            mon1.append(mon_node[i])
            i = i + 1
        initial_mons = " ".join(mon1)
        for k in range(mon_no, len(mon_node)):
            mon_create_nodes = "./ceph-deploy mon create" + " " + initial_mons + " " + mon_node[k]
            estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
            if estatus_mon != 0:
                raise RuntimeError("ceph-deploy: Failed to create monitor")
    else:
        mon_create_nodes = "./ceph-deploy mon create" + " " + mon_nodes
        estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
        if estatus_mon != 0:
            raise RuntimeError("ceph-deploy: Failed to create monitors")

    estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
    while estatus_gather != 0:
        # mon_create_nodes = './ceph-deploy mon create'+" "+mon_node[0]
        # execute_ceph_deploy(ctx, config, mon_create_nodes)
        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)

    if mds_nodes:
        estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
        if estatus_mds != 0:
            raise RuntimeError("ceph-deploy: Failed to deploy mds")

    if config.get("test_mon_destroy") is not None:
        for d in range(1, len(mon_node)):
            mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d]
            estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes)
            if estatus_mon_d != 0:
                raise RuntimeError("ceph-deploy: Failed to delete monitor")

    node_dev_list = get_dev_for_osd(ctx, config)
    for d in node_dev_list:
        osd_create_cmds = "./ceph-deploy osd create --zap-disk" + " " + d
        estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
        if estatus_osd == 0:
            log.info("successfully created osd")
            no_of_osds += 1
        else:
            zap_disk = "./ceph-deploy disk zap" + " " + d
            execute_ceph_deploy(ctx, config, zap_disk)
            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
            if estatus_osd == 0:
                log.info("successfully created osd")
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")

    if config.get("wait-for-healthy", True) and no_of_osds >= 2:
        is_healthy(ctx=ctx, config=None)

        log.info("Setting up client nodes...")
        conf_path = "/etc/ceph/ceph.conf"
        admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring"
        first_mon = teuthology.get_first_mon(ctx, config)
        (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
        conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True)
        admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True)

        clients = ctx.cluster.only(teuthology.is_type("client"))
        for remot, roles_for_host in clients.remotes.iteritems():
            for id_ in teuthology.roles_of_type(roles_for_host, "client"):
                client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_)
                mon0_remote.run(
                    args=[
                        "cd",
                        "{tdir}".format(tdir=testdir),
                        run.Raw("&&"),
                        "sudo",
                        "bash",
                        "-c",
                        run.Raw('"'),
                        "ceph",
                        "auth",
                        "get-or-create",
                        "client.{id}".format(id=id_),
                        "mds",
                        "allow",
                        "mon",
                        "allow *",
                        "osd",
                        "allow *",
                        run.Raw(">"),
                        client_keyring,
                        run.Raw('"'),
                    ]
                )
                key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True)
                teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644")
                teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644")
                teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644")
    else:
        raise RuntimeError("The cluster is NOT operational due to insufficient OSDs")

    try:
        yield

    finally:
        log.info("Stopping ceph...")
        ctx.cluster.run(args=["sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop"])

        if ctx.archive is not None:
            # archive mon data, too
            log.info("Archiving mon data...")
            path = os.path.join(ctx.archive, "data")
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type("mon"))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith("mon."):
                        teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz")

            log.info("Compressing logs...")
            run.wait(
                ctx.cluster.run(
                    args=[
                        "sudo",
                        "find",
                        "/var/log/ceph",
                        "-name",
                        "*.log",
                        "-print0",
                        run.Raw("|"),
                        "sudo",
                        "xargs",
                        "-0",
                        "--no-run-if-empty",
                        "--",
                        "gzip",
                        "--",
                    ],
                    wait=False,
                )
            )

            log.info("Archiving logs...")
            path = os.path.join(ctx.archive, "remote")
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log"))

        log.info("Purging package...")
        execute_ceph_deploy(ctx, config, purge_nodes)
        log.info("Purging data...")
        execute_ceph_deploy(ctx, config, purgedata_nodes)
Example #22
0
def distribute_ceph_conf(devstack_node, ceph_node):
    log.info("Copying ceph.conf to DevStack node...")

    ceph_conf_path = "/etc/ceph/ceph.conf"
    ceph_conf = misc.get_file(ceph_node, ceph_conf_path, sudo=True)
    misc.sudo_write_file(devstack_node, ceph_conf_path, ceph_conf)
Example #23
0
def task(ctx, config):
    """
    Setup samba smbd with ceph vfs module.  This task assumes the samba
    package has already been installed via the install task.

    The config is optional and defaults to starting samba on all nodes.
    If a config is given, it is expected to be a list of
    samba nodes to start smbd servers on.

    Example that starts smbd on all samba nodes::

        tasks:
        - install:
        - install:
            project: samba
            extra_packages: ['samba']
        - ceph:
        - samba:
        - interactive:

    Example that starts smbd on just one of the samba nodes and cifs on the other::

        tasks:
        - samba: [samba.0]
        - cifs: [samba.1]

    An optional backend can be specified, and requires a path which smbd will
    use as the backend storage location:

        roles:
            - [osd.0, osd.1, osd.2, mon.0, mon.1, mon.2, mds.a]
            - [client.0, samba.0]

        tasks:
        - ceph:
        - ceph-fuse: [client.0]
        - samba:
            samba.0:
              cephfuse: "{testdir}/mnt.0"

    This mounts ceph to {testdir}/mnt.0 using fuse, and starts smbd with
    a UNC of //localhost/cephfuse.  Access through that UNC will be on
    the ceph fuse mount point.

    If no arguments are specified in the samba
    role, the default behavior is to enable the ceph UNC //localhost/ceph
    and use the ceph vfs module as the smbd backend.

    """
    log.info("Setting up smbd with ceph vfs...")
    assert config is None or isinstance(config, list) or isinstance(config, dict), \
        "task samba got invalid config"

    if config is None:
        config = dict(('samba.{id}'.format(id=id_), None)
                  for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba'))
    elif isinstance(config, list):
        config = dict((name, None) for name in config)

    samba_servers = list(get_sambas(ctx=ctx, roles=config.keys()))

    testdir = teuthology.get_testdir(ctx)

    from teuthology.task.ceph import CephState
    if not hasattr(ctx, 'daemons'):
        ctx.daemons = CephState()

    for id_, remote in samba_servers:

        rolestr = "samba.{id_}".format(id_=id_)

        confextras = """vfs objects = ceph
  ceph:config_file = /etc/ceph/ceph.conf"""

        unc = "ceph"
        backend = "/"

        if config[rolestr] is not None:
            # verify that there's just one parameter in role
            if len(config[rolestr]) != 1:
                log.error("samba config for role samba.{id_} must have only one parameter".format(id_=id_))
                raise Exception('invalid config')
            confextras = ""
            (unc, backendstr) = config[rolestr].items()[0]
            backend = backendstr.format(testdir=testdir)

        # on first samba role, set ownership and permissions of ceph root
        # so that samba tests succeed
        if config[rolestr] is None and id_ == samba_servers[0][0]:
            remote.run(
                    args=[
                        'mkdir', '-p', '/tmp/cmnt', run.Raw('&&'),
                        'sudo', 'ceph-fuse', '/tmp/cmnt', run.Raw('&&'),
                        'sudo', 'chown', 'ubuntu:ubuntu', '/tmp/cmnt/', run.Raw('&&'),
                        'sudo', 'chmod', '1777', '/tmp/cmnt/', run.Raw('&&'),
                        'sudo', 'umount', '/tmp/cmnt/', run.Raw('&&'),
                        'rm', '-rf', '/tmp/cmnt',
                        ],
                    )
        else:
            remote.run(
                    args=[
                        'sudo', 'chown', 'ubuntu:ubuntu', backend, run.Raw('&&'),
                        'sudo', 'chmod', '1777', backend,
                        ],
                    )

        teuthology.sudo_write_file(remote, "/usr/local/samba/etc/smb.conf", """
[global]
  workgroup = WORKGROUP
  netbios name = DOMAIN

[{unc}]
  path = {backend}
  {extras}
  writeable = yes
  valid users = ubuntu
""".format(extras=confextras, unc=unc, backend=backend))

        # create ubuntu user
        remote.run(
            args=[
                'sudo', '/usr/local/samba/bin/smbpasswd', '-e', 'ubuntu',
                run.Raw('||'),
                'printf', run.Raw('"ubuntu\nubuntu\n"'),
                run.Raw('|'),
                'sudo', '/usr/local/samba/bin/smbpasswd', '-s', '-a', 'ubuntu'
            ])

        smbd_cmd = [
                'sudo',
                'daemon-helper',
                'kill',
                'nostdin',
                '/usr/local/samba/sbin/smbd',
                '-F',
                ]
        ctx.daemons.add_daemon(remote, 'smbd', id_,
                               args=smbd_cmd,
                               logger=log.getChild("smbd.{id_}".format(id_=id_)),
                               stdin=run.PIPE,
                               wait=False,
                               )

        # let smbd initialize, probably a better way...
        import time
        time.sleep(5)
    try:
        yield
    finally:
        log.info('Stopping smbd processes...')
        exc_info = (None, None, None)
        for d in ctx.daemons.iter_daemons_of_role('smbd'):
            try:
                d.stop()
            except (run.CommandFailedError,
                    run.CommandCrashedError,
                    run.ConnectionLostError):
                exc_info = sys.exc_info()
                log.exception('Saw exception from %s.%s', d.role, d.id_)
        if exc_info != (None, None, None):
            raise exc_info[0], exc_info[1], exc_info[2]

        for id_, remote in samba_servers:
            remote.run(
                args=[
                    'sudo',
                    'rm', '-rf',
                    '/usr/local/samba/etc/smb.conf',
                    '/usr/local/samba/private/*',
                    '/usr/local/samba/var/run/',
                    '/usr/local/samba/var/locks',
                    '/usr/local/samba/var/lock',
                    ],
                )
            # make sure daemons are gone
            try:
                remote.run(
                    args=[
                        'while',
                        'sudo', 'killall', '-9', 'smbd',
                        run.Raw(';'),
                        'do', 'sleep', '1',
                        run.Raw(';'),
                        'done',
                        ],
                    )

                remote.run(
                    args=[
                        'sudo',
                        'lsof',
                        backend,
                        ],
                    )
                remote.run(
                    args=[
                        'sudo',
                        'fuser',
                        '-M',
                        backend,
                        ],
                    )
            except Exception:
                log.exception("Saw exception")
                pass
Example #24
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    # Expect to find ceph_admin on the first mon by ID, same place that the download task
    # puts it.  Remember this here, because subsequently IDs will change from those in
    # the test config to those that ceph-deploy invents.
    (ceph_admin,) = ctx.cluster.only(
        teuthology.get_first_mon(ctx, config)).remotes.iterkeys()

    def execute_ceph_deploy(cmd):
        """Remotely execute a ceph_deploy command"""
        return ceph_admin.run(
            args=[
                'cd',
                '{tdir}/ceph-deploy'.format(tdir=testdir),
                run.Raw('&&'),
                run.Raw(cmd),
            ],
            check_status=False,
        ).exitstatus

    try:
        log.info('Building ceph cluster using ceph-deploy...')
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get('branch') is not None:
            cbranch = config.get('branch')
            for var, val in cbranch.iteritems():
                ceph_branch = '--{var}={val}'.format(var=var, val=val)
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_role(ctx, 'mds')
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_role(ctx, 'mon')
        mon_nodes = " ".join(mon_node)
        mgr_nodes = get_nodes_using_role(ctx, 'mgr')
        mgr_nodes = " ".join(mgr_nodes)
        new_mon = './ceph-deploy new' + " " + mon_nodes
        mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
        mon_hostname = mon_nodes.split(' ')[0]
        mon_hostname = str(mon_hostname)
        gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
        deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
        no_of_osds = 0

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info('adding config inputs...')
        testdir = teuthology.get_testdir(ctx)
        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)

        if config.get('conf') is not None:
            confp = config.get('conf')
            for section, keys in confp.iteritems():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
                                                sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(
                        ceph_admin, conf_path, lines, sudo=True)

        # install ceph
        dev_branch = ctx.config['branch']
        branch = '--dev={branch}'.format(branch=dev_branch)
        if ceph_branch:
            option = ceph_branch
        else:
            option = branch
        install_nodes = './ceph-deploy install ' + option + " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")
        # install ceph-test package too
        install_nodes2 = './ceph-deploy install --tests ' + option + \
                         " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes2)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph-test")

        mon_create_nodes = './ceph-deploy mon create-initial'
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        execute_ceph_deploy(mon_create_nodes)
        execute_ceph_deploy(mgr_create)

        # create-keys is explicit now
        # http://tracker.ceph.com/issues/16036
        mons = ctx.cluster.only(teuthology.is_type('mon'))
        for remote in mons.remotes.iterkeys():
            remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph',
                             '--id', remote.shortname])

        estatus_gather = execute_ceph_deploy(gather_keys)
        if mds_nodes:
            estatus_mds = execute_ceph_deploy(deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get('test_mon_destroy') is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = './ceph-deploy mon destroy' + \
                    " " + mon_node[d]
                estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")

        node_dev_list = get_dev_for_osd(ctx, config)
        for d in node_dev_list:
            node = d[0]
            for disk in d[1:]:
                zap = './ceph-deploy disk zap ' + node + ':' + disk
                estatus = execute_ceph_deploy(zap)
                if estatus != 0:
                    raise RuntimeError("ceph-deploy: Failed to zap osds")
            osd_create_cmd = './ceph-deploy osd create '
            if config.get('dmcrypt') is not None:
                osd_create_cmd += '--dmcrypt '
            osd_create_cmd += ":".join(d)
            estatus_osd = execute_ceph_deploy(osd_create_cmd)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")

        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info('Setting up client nodes...')
            conf_path = '/etc/ceph/ceph.conf'
            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(
                remote=mon0_remote,
                path=conf_path,
                sudo=True,
            )
            admin_keyring = teuthology.get_file(
                remote=mon0_remote,
                path=admin_keyring_path,
                sudo=True,
            )

            clients = ctx.cluster.only(teuthology.is_type('client'))
            for remot, roles_for_host in clients.remotes.iteritems():
                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                    client_keyring = \
                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    mon0_remote.run(
                        args=[
                            'cd',
                            '{tdir}'.format(tdir=testdir),
                            run.Raw('&&'),
                            'sudo', 'bash', '-c',
                            run.Raw('"'), 'ceph',
                            'auth',
                            'get-or-create',
                            'client.{id}'.format(id=id_),
                            'mds', 'allow',
                            'mon', 'allow *',
                            'osd', 'allow *',
                            run.Raw('>'),
                            client_keyring,
                            run.Raw('"'),
                        ],
                    )
                    key_data = teuthology.get_file(
                        remote=mon0_remote,
                        path=client_keyring,
                        sudo=True,
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=client_keyring,
                        data=key_data,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=admin_keyring_path,
                        data=admin_keyring,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=conf_path,
                        data=conf_data,
                        perms='0644'
                    )

            if mds_nodes:
                log.info('Configuring CephFS...')
                ceph_fs = Filesystem(ctx, create=True)
        elif not config.get('only_mon'):
            raise RuntimeError(
                "The cluster is NOT operational due to insufficient OSDs")
        yield

    except Exception:
        log.info(
            "Error encountered, logging exception before tearing down ceph-deploy")
        log.info(traceback.format_exc())
        raise
    finally:
        if config.get('keep_running'):
            return
        log.info('Stopping ceph...')
        ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
                              'sudo', 'service', 'ceph', 'stop', run.Raw('||'),
                              'sudo', 'systemctl', 'stop', 'ceph.target'])

        # Are you really not running anymore?
        # try first with the init tooling
        # ignoring the status so this becomes informational only
        ctx.cluster.run(
            args=[
                'sudo', 'status', 'ceph-all', run.Raw('||'),
                'sudo', 'service', 'ceph', 'status', run.Raw('||'),
                'sudo', 'systemctl', 'status', 'ceph.target'],
            check_status=False)

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
                              'grep', '-v', 'grep', run.Raw('|'),
                              'grep', 'ceph'], check_status=False)

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                    ],
                    wait=False,
                ),
            )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = './ceph-deploy purge' + " " + all_nodes
        purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes

        log.info('Purging package...')
        execute_ceph_deploy(purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(purgedata_nodes)
Example #25
0
def cluster(ctx, config):
    """
    Handle the creation and removal of a ceph cluster.

    On startup:
        Create directories needed for the cluster.
        Create remote journals for all osds.
        Create and set keyring.
        Copy the monmap to tht test systems.
        Setup mon nodes.
        Setup mds nodes.
        Mkfs osd nodes.
        Add keyring information to monmaps
        Mkfs mon nodes.

    On exit:
        If errors occured, extract a failure message and store in ctx.summary.
        Unmount all test files and temporary journaling files.
        Save the monitor information and archive all ceph logs.
        Cleanup the keyring setup, and remove all monitor map and data files left over.

    :param ctx: Context
    :param config: Configuration
    """
    if ctx.config.get('use_existing_cluster', False) is True:
        log.info("'use_existing_cluster' is true; skipping cluster creation")
        yield

    testdir = teuthology.get_testdir(ctx)
    log.info('Creating ceph cluster...')
    run.wait(
        ctx.cluster.run(
            args=[
                'install', '-d', '-m0755', '--',
                '{tdir}/data'.format(tdir=testdir),
                ],
            wait=False,
            )
        )

    run.wait(
        ctx.cluster.run(
            args=[
                'sudo',
                'install', '-d', '-m0777', '--', '/var/run/ceph',
                ],
            wait=False,
            )
        )


    devs_to_clean = {}
    remote_to_roles_to_devs = {}
    remote_to_roles_to_journals = {}
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        devs = teuthology.get_scratch_devices(remote)
        roles_to_devs = {}
        roles_to_journals = {}
        if config.get('fs'):
            log.info('fs option selected, checking for scratch devs')
            log.info('found devs: %s' % (str(devs),))
            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
            iddevs = devs_id_map.values()
            roles_to_devs = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
                )
            if len(roles_to_devs) < len(iddevs):
                iddevs = iddevs[len(roles_to_devs):]
            devs_to_clean[remote] = []

        if config.get('block_journal'):
            log.info('block journal enabled')
            roles_to_journals = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
                )
            log.info('journal map: %s', roles_to_journals)

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled')
            roles_to_journals = {}
            remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] )
            for osd in teuthology.roles_of_type(roles_for_host, 'osd'):
                tmpfs = '/mnt/osd.%s' % osd
                roles_to_journals[osd] = tmpfs
                remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] )
            log.info('journal map: %s', roles_to_journals)

        log.info('dev map: %s' % (str(roles_to_devs),))
        remote_to_roles_to_devs[remote] = roles_to_devs
        remote_to_roles_to_journals[remote] = roles_to_journals


    log.info('Generating config...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [role_list for (remote, role_list) in remotes_and_roles]
    ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
    conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips)
    for remote, roles_to_journals in remote_to_roles_to_journals.iteritems():
        for role, journal in roles_to_journals.iteritems():
            key = "osd." + str(role)
            if key not in conf:
                conf[key] = {}
            conf[key]['osd journal'] = journal
    for section, keys in config['conf'].iteritems():
        for key, value in keys.iteritems():
            log.info("[%s] %s = %s" % (section, key, value))
            if section not in conf:
                conf[section] = {}
            conf[section][key] = value

    if config.get('tmpfs_journal'):
        conf['journal dio'] = False

    ctx.ceph = argparse.Namespace()
    ctx.ceph.conf = conf

    keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring')

    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    firstmon = teuthology.get_first_mon(ctx, config)

    log.info('Setting up %s...' % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph-authtool',
            '--create-keyring',
            keyring_path,
            ],
        )
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph-authtool',
            '--gen-key',
            '--name=mon.',
            keyring_path,
            ],
        )
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'chmod',
            '0644',
            keyring_path,
            ],
        )
    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
    fsid = teuthology.create_simple_monmap(
        ctx,
        remote=mon0_remote,
        conf=conf,
        )
    if not 'global' in conf:
        conf['global'] = {}
    conf['global']['fsid'] = fsid

    log.info('Writing ceph.conf for FSID %s...' % fsid)
    conf_path = config.get('conf_path', DEFAULT_CONF_PATH)
    write_conf(ctx, conf_path)

    log.info('Creating admin key on %s...' % firstmon)
    ctx.cluster.only(firstmon).run(
        args=[
            'sudo',
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph-authtool',
            '--gen-key',
            '--name=client.admin',
            '--set-uid=0',
            '--cap', 'mon', 'allow *',
            '--cap', 'osd', 'allow *',
            '--cap', 'mds', 'allow *',
            keyring_path,
            ],
        )

    log.info('Copying monmap to all nodes...')
    keyring = teuthology.get_file(
        remote=mon0_remote,
        path=keyring_path,
        )
    monmap = teuthology.get_file(
        remote=mon0_remote,
        path='{tdir}/monmap'.format(tdir=testdir),
        )

    for rem in ctx.cluster.remotes.iterkeys():
        # copy mon key and initial monmap
        log.info('Sending monmap to node {remote}'.format(remote=rem))
        teuthology.sudo_write_file(
            remote=rem,
            path=keyring_path,
            data=keyring,
            perms='0644'
            )
        teuthology.write_file(
            remote=rem,
            path='{tdir}/monmap'.format(tdir=testdir),
            data=monmap,
            )

    log.info('Setting up mon nodes...')
    mons = ctx.cluster.only(teuthology.is_type('mon'))
    run.wait(
        mons.run(
            args=[
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'osdmaptool',
                '-c', conf_path,
                '--clobber',
                '--createsimple', '{num:d}'.format(
                    num=teuthology.num_instances_of_type(ctx.cluster, 'osd'),
                    ),
                '{tdir}/osdmap'.format(tdir=testdir),
                '--pg_bits', '2',
                '--pgp_bits', '4',
                ],
            wait=False,
            ),
        )

    log.info('Setting up mds nodes...')
    mdss = ctx.cluster.only(teuthology.is_type('mds'))
    for remote, roles_for_host in mdss.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
            remote.run(
                args=[
                    'sudo',
                    'mkdir',
                    '-p',
                    '/var/lib/ceph/mds/ceph-{id}'.format(id=id_),
                    run.Raw('&&'),
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    '--name=mds.{id}'.format(id=id_),
                    '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_),
                    ],
                )

    cclient.create_keyring(ctx)
    log.info('Running mkfs on osd nodes...')

    ctx.disk_config = argparse.Namespace()
    ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs
    ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals
    ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
    ctx.disk_config.remote_to_roles_to_dev_fstype = {}

    log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev)))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = remote_to_roles_to_devs[remote]
        roles_to_journals = remote_to_roles_to_journals[remote]


        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(
                args=[
                    'sudo',
                    'mkdir',
                    '-p',
                    '/var/lib/ceph/osd/ceph-{id}'.format(id=id_),
                    ])
            log.info(str(roles_to_journals))
            log.info(id_)
            if roles_to_devs.get(id_):
                dev = roles_to_devs[id_]
                fs = config.get('fs')
                package = None
                mkfs_options = config.get('mkfs_options')
                mount_options = config.get('mount_options')
                if fs == 'btrfs':
                    #package = 'btrfs-tools'
                    if mount_options is None:
                        mount_options = ['noatime','user_subvol_rm_allowed']
                    if mkfs_options is None:
                        mkfs_options = ['-m', 'single',
                                        '-l', '32768',
                                        '-n', '32768']
                if fs == 'xfs':
                    #package = 'xfsprogs'
                    if mount_options is None:
                        mount_options = ['noatime']
                    if mkfs_options is None:
                        mkfs_options = ['-f', '-i', 'size=2048']
                if fs == 'ext4' or fs == 'ext3':
                    if mount_options is None:
                        mount_options = ['noatime','user_xattr']

                if mount_options is None:
                    mount_options = []
                if mkfs_options is None:
                    mkfs_options = []
                mkfs = ['mkfs.%s' % fs] + mkfs_options
                log.info('%s on %s on %s' % (mkfs, dev, remote))
                if package is not None:
                    remote.run(
                        args=[
                            'sudo',
                            'apt-get', 'install', '-y', package
                            ],
                        stdout=StringIO(),
                        )

                try:
                    remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
                except run.CommandFailedError:
                    # Newer btfs-tools doesn't prompt for overwrite, use -f
                    if '-f' not in mount_options:
                        mkfs_options.append('-f')
                        mkfs = ['mkfs.%s' % fs] + mkfs_options
                        log.info('%s on %s on %s' % (mkfs, dev, remote))
                    remote.run(args= ['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])

                log.info('mount %s on %s -o %s' % (dev, remote,
                                                   ','.join(mount_options)))
                remote.run(
                    args=[
                        'sudo',
                        'mount',
                        '-t', fs,
                        '-o', ','.join(mount_options),
                        dev,
                        os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)),
                        ]
                    )
                if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
                    ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][id_] = mount_options
                if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
                    ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs
                devs_to_clean[remote].append(
                    os.path.join(
                        os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(id=id_)),
                        )
                    )

        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(
                args=[
                    'sudo',
                    'MALLOC_CHECK_=3',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-osd',
                    '--mkfs',
                    '--mkkey',
                    '-i', id_,
                    '--monmap', '{tdir}/monmap'.format(tdir=testdir),
                    ],
                )


    log.info('Reading keys from all nodes...')
    keys_fp = StringIO()
    keys = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['mds','osd']:
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                data = teuthology.get_file(
                    remote=remote,
                    path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format(
                        type=type_,
                        id=id_,
                        ),
                    sudo=True,
                    )
                keys.append((type_, id_, data))
                keys_fp.write(data)
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['client']:
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                data = teuthology.get_file(
                    remote=remote,
                    path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    )
                keys.append((type_, id_, data))
                keys_fp.write(data)

    log.info('Adding keys to all mons...')
    writes = mons.run(
        args=[
            'sudo', 'tee', '-a',
            keyring_path,
            ],
        stdin=run.PIPE,
        wait=False,
        stdout=StringIO(),
        )
    keys_fp.seek(0)
    teuthology.feed_many_stdins_and_close(keys_fp, writes)
    run.wait(writes)
    for type_, id_, data in keys:
        run.wait(
            mons.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    keyring_path,
                    '--name={type}.{id}'.format(
                        type=type_,
                        id=id_,
                        ),
                    ] + list(teuthology.generate_caps(type_)),
                wait=False,
                ),
            )

    log.info('Running mkfs on mon nodes...')
    for remote, roles_for_host in mons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
            remote.run(
                args=[
                  'sudo',
                  'mkdir',
                  '-p',
                  '/var/lib/ceph/mon/ceph-{id}'.format(id=id_),
                  ],
                )
            remote.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-mon',
                    '--mkfs',
                    '-i', id_,
                    '--monmap={tdir}/monmap'.format(tdir=testdir),
                    '--osdmap={tdir}/osdmap'.format(tdir=testdir),
                    '--keyring={kpath}'.format(kpath=keyring_path),
                    ],
                )


    run.wait(
        mons.run(
            args=[
                'rm',
                '--',
                '{tdir}/monmap'.format(tdir=testdir),
                '{tdir}/osdmap'.format(tdir=testdir),
                ],
            wait=False,
            ),
        )

    try:
        yield
    except Exception:
        # we need to know this below
        ctx.summary['success'] = False
        raise
    finally:
        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()

        log.info('Checking cluster log for badness...')
        def first_in_ceph_log(pattern, excludes):
            """
            Find the first occurence of the pattern specified in the Ceph log,
            Returns None if none found.

            :param pattern: Pattern scanned for.
            :param excludes: Patterns to ignore.
            :return: First line of text (or None if not found)
            """
            args = [
                'sudo',
                'egrep', pattern,
                '/var/log/ceph/ceph.log',
                ]
            for exclude in excludes:
                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
            args.extend([
                    run.Raw('|'), 'head', '-n', '1',
                    ])
            r = mon0_remote.run(
                stdout=StringIO(),
                args=args,
                )
            stdout = r.stdout.getvalue()
            if stdout != '':
                return stdout
            return None

        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
                             config['log_whitelist']) is not None:
            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
            ctx.summary['success'] = False
            # use the most severe problem as the failure reason
            if 'failure_reason' not in ctx.summary:
                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
                    match = first_in_ceph_log(pattern, config['log_whitelist'])
                    if match is not None:
                        ctx.summary['failure_reason'] = \
                            '"{match}" in cluster log'.format(
                            match=match.rstrip('\n'),
                            )
                        break

        for remote, dirs in devs_to_clean.iteritems():
            for dir_ in dirs:
                log.info('Unmounting %s on %s' % (dir_, remote))
                try:
                    remote.run(
                        args=[
                            'sync',
                            run.Raw('&&'),
                            'sudo',
                            'umount',
                            '-f',
                            dir_
                        ]
                    )
                except Exception as e:
                    remote.run(args=[
                            'sudo',
                            run.Raw('PATH=/usr/sbin:$PATH'),
                            'lsof',
                            run.Raw(';'),
                            'ps', 'auxf',
                            ])
                    raise e

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled - unmounting tmpfs at /mnt')
            for remote, roles_for_host in osds.remotes.iteritems():
                remote.run(
                    args=[ 'sudo', 'umount', '-f', '/mnt' ],
                    check_status=False,
                )

        if ctx.archive is not None and \
           not (ctx.config.get('archive-on-error') and ctx.summary['success']):

            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

        log.info('Cleaning ceph cluster...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'rm',
                    '-rf',
                    '--',
                    conf_path,
                    keyring_path,
                    '{tdir}/data'.format(tdir=testdir),
                    '{tdir}/monmap'.format(tdir=testdir),
                    ],
                wait=False,
                ),
            )
Example #26
0
def syslog(ctx, config):
    """
    start syslog / stop syslog on exit.
    """
    if ctx.archive is None:
        # disable this whole feature if we're not going to archive the data anyway
        yield
        return

    log.info('Starting syslog monitoring...')

    archive_dir = teuthology.get_archive_dir(ctx)
    run.wait(
        ctx.cluster.run(
            args=[
                'mkdir', '-m0755', '--',
                '{adir}/syslog'.format(adir=archive_dir),
                ],
            wait=False,
            )
        )

    CONF = '/etc/rsyslog.d/80-cephtest.conf'
    conf_fp = StringIO('''
kern.* -{adir}/syslog/kern.log;RSYSLOG_FileFormat
*.*;kern.none -{adir}/syslog/misc.log;RSYSLOG_FileFormat
'''.format(adir=archive_dir))
    try:
        for rem in ctx.cluster.remotes.iterkeys():
            teuthology.sudo_write_file(
                remote=rem,
                path=CONF,
                data=conf_fp,
                )
            conf_fp.seek(0)
        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'service',
                    # a mere reload (SIGHUP) doesn't seem to make
                    # rsyslog open the files
                    'rsyslog',
                    'restart',
                    ],
                wait=False,
                ),
            )

        yield
    finally:
        log.info('Shutting down syslog monitoring...')

        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'rm',
                    '-f',
                    '--',
                    CONF,
                    run.Raw('&&'),
                    'sudo',
                    'service',
                    'rsyslog',
                    'restart',
                    ],
                wait=False,
                ),
            )
        # race condition: nothing actually says rsyslog had time to
        # flush the file fully. oh well.

        log.info('Checking logs for errors...')
        for remote in ctx.cluster.remotes.iterkeys():
            log.debug('Checking %s', remote.name)
            r = remote.run(
                args=[
                    'egrep', '--binary-files=text',
                    '\\bBUG\\b|\\bINFO\\b|\\bDEADLOCK\\b',
                    run.Raw('{adir}/syslog/*.log'.format(adir=archive_dir)),
                    run.Raw('|'),
                    'grep', '-v', 'task .* blocked for more than .* seconds',
                    run.Raw('|'),
                    'grep', '-v', 'lockdep is turned off',
                    run.Raw('|'),
                    'grep', '-v', 'trying to register non-static key',
                    run.Raw('|'),
                    'grep', '-v', 'DEBUG: fsize',  # xfs_fsr
                    run.Raw('|'),
                    'grep', '-v', 'CRON',  # ignore cron noise
                    run.Raw('|'),
                    'grep', '-v', 'BUG: bad unlock balance detected', # #6097
                    run.Raw('|'),
                    'grep', '-v', 'inconsistent lock state', # FIXME see #2523
                    run.Raw('|'),
                    'grep', '-v', '*** DEADLOCK ***', # part of lockdep output
                    run.Raw('|'),
                    'grep', '-v', 'INFO: possible irq lock inversion dependency detected', # FIXME see #2590 and #147
                    run.Raw('|'),
                    'grep', '-v', 'INFO: NMI handler (perf_event_nmi_handler) took too long to run',
                    run.Raw('|'),
                    'grep', '-v', 'INFO: recovery required on readonly',
                    run.Raw('|'),
                    'head', '-n', '1',
                    ],
                stdout=StringIO(),
                )
            stdout = r.stdout.getvalue()
            if stdout != '':
                log.error('Error in syslog on %s: %s', remote.name, stdout)
                ctx.summary['success'] = False
                if 'failure_reason' not in ctx.summary:
                    ctx.summary['failure_reason'] = \
                        "'{error}' in syslog".format(error=stdout)

        log.info('Compressing syslogs...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'find',
                    '{adir}/syslog'.format(adir=archive_dir),
                    '-name',
                    '*.log',
                    '-print0',
                    run.Raw('|'),
                    'sudo',
                    'xargs',
                    '-0',
                    '--no-run-if-empty',
                    '--',
                    'gzip',
                    '--',
                    ],
                wait=False,
                ),
            )
Example #27
0
def update_devstack_config_files(devstack_node, secret_uuid):
    log.info("Updating DevStack config files to use Ceph...")

    def backup_config(node, file_name, backup_ext=".orig.teuth"):
        node.run(args=["cp", "-f", file_name, file_name + backup_ext])

    def update_config(config_name, config_stream, update_dict, section="DEFAULT"):
        parser = ConfigParser()
        parser.read_file(config_stream)
        for (key, value) in update_dict.items():
            parser.set(section, key, value)
        out_stream = StringIO()
        parser.write(out_stream)
        out_stream.seek(0)
        return out_stream

    updates = [
        dict(
            name="/etc/glance/glance-api.conf",
            options=dict(
                default_store="rbd", rbd_store_user="******", rbd_store_pool="images", show_image_direct_url="True"
            ),
        ),
        dict(
            name="/etc/cinder/cinder.conf",
            options=dict(
                volume_driver="cinder.volume.drivers.rbd.RBDDriver",
                rbd_pool="volumes",
                rbd_ceph_conf="/etc/ceph/ceph.conf",
                rbd_flatten_volume_from_snapshot="false",
                rbd_max_clone_depth="5",
                glance_api_version="2",
                rbd_user="******",
                rbd_secret_uuid=secret_uuid,
                backup_driver="cinder.backup.drivers.ceph",
                backup_ceph_conf="/etc/ceph/ceph.conf",
                backup_ceph_user="******",
                backup_ceph_chunk_size="134217728",
                backup_ceph_pool="backups",
                backup_ceph_stripe_unit="0",
                backup_ceph_stripe_count="0",
                restore_discard_excess_bytes="true",
            ),
        ),
        dict(
            name="/etc/nova/nova.conf",
            options=dict(
                libvirt_images_type="rbd",
                libvirt_images_rbd_pool="volumes",
                libvirt_images_rbd_ceph_conf="/etc/ceph/ceph.conf",
                rbd_user="******",
                rbd_secret_uuid=secret_uuid,
                libvirt_inject_password="******",
                libvirt_inject_key="false",
                libvirt_inject_partition="-2",
            ),
        ),
    ]

    for update in updates:
        file_name = update["name"]
        options = update["options"]
        config_str = misc.get_file(devstack_node, file_name, sudo=True)
        config_stream = StringIO(config_str)
        backup_config(devstack_node, file_name)
        new_config_stream = update_config(file_name, config_stream, options)
        misc.sudo_write_file(devstack_node, file_name, new_config_stream)
Example #28
0
def ship_utilities(ctx, config):
    """
    Write a copy of valgrind.supp to each of the remote sites.  Set executables used
    by Ceph in /usr/local/bin.  When finished (upon exit of the teuthology run), remove
    these files.

    :param ctx: Context
    :param config: Configuration
    """
    assert config is None
    testdir = teuthology.get_testdir(ctx)
    filenames = []

    log.info('Shipping valgrind.supp...')
    with file(os.path.join(os.path.dirname(__file__), 'valgrind.supp'), 'rb') as f:
        fn = os.path.join(testdir, 'valgrind.supp')
        filenames.append(fn)
        for rem in ctx.cluster.remotes.iterkeys():
            teuthology.sudo_write_file(
                remote=rem,
                path=fn,
                data=f,
                )
            f.seek(0)

    FILES = ['daemon-helper', 'adjust-ulimits']
    destdir = '/usr/bin'
    for filename in FILES:
        log.info('Shipping %r...', filename)
        src = os.path.join(os.path.dirname(__file__), filename)
        dst = os.path.join(destdir, filename)
        filenames.append(dst)
        with file(src, 'rb') as f:
            for rem in ctx.cluster.remotes.iterkeys():
                teuthology.sudo_write_file(
                    remote=rem,
                    path=dst,
                    data=f,
                )
                f.seek(0)
                rem.run(
                    args=[
                        'sudo',
                        'chmod',
                        'a=rx',
                        '--',
                        dst,
                    ],
                )

    try:
        yield
    finally:
        log.info('Removing shipped files: %s...', ' '.join(filenames))
        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'rm',
                    '-f',
                    '--',
                ] + list(filenames),
                wait=False,
            ),
        )
Example #29
0
def build_ceph_cluster(ctx, config):
    log.info('Building ceph cluster using ceph-deploy...')
    testdir = teuthology.get_testdir(ctx)
    ceph_branch = None
    if config.get('branch') is not None:
        cbranch = config.get('branch')
        for var, val in cbranch.iteritems():
            if var == 'testing':
                ceph_branch = '--{var}'.format(var=var)
            ceph_branch = '--{var}={val}'.format(var=var, val=val)
    node_dev_list = []
    all_nodes = get_all_nodes(ctx, config)
    mds_nodes = get_nodes_using_roles(ctx, config, 'mds')
    mds_nodes = " ".join(mds_nodes)
    mon_node = get_nodes_using_roles(ctx, config, 'mon')
    mon_nodes = " ".join(mon_node)
    new_mon = './ceph-deploy new'+" "+mon_nodes
    install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes
    purge_nodes = './ceph-deploy purge'+" "+all_nodes
    purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes
    mon_hostname = mon_nodes.split(' ')[0]
    mon_hostname = str(mon_hostname)
    gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname
    deploy_mds = './ceph-deploy mds create'+" "+mds_nodes
    no_of_osds = 0

    if mon_nodes is None:
        raise RuntimeError("no monitor nodes in the config file")

    estatus_new = execute_ceph_deploy(ctx, config, new_mon)
    if estatus_new != 0:
        raise RuntimeError("ceph-deploy: new command failed")

    log.info('adding config inputs...')
    testdir = teuthology.get_testdir(ctx)
    conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
    first_mon = teuthology.get_first_mon(ctx, config)
    (remote,) = ctx.cluster.only(first_mon).remotes.keys()

    lines = None
    if config.get('conf') is not None:
        confp = config.get('conf')
        for section, keys in confp.iteritems():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True)

    estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
    if estatus_install != 0:
        raise RuntimeError("ceph-deploy: Failed to install ceph")

    mon_no = None
    mon_no = config.get('mon_initial_members')
    if mon_no is not None:
        i = 0
        mon1 = []
        while(i < mon_no):
            mon1.append(mon_node[i])
            i = i + 1
        initial_mons = " ".join(mon1)
        for k in range(mon_no, len(mon_node)):
            mon_create_nodes = './ceph-deploy mon create'+" "+initial_mons+" "+mon_node[k]
            estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
            if estatus_mon != 0:
                raise RuntimeError("ceph-deploy: Failed to create monitor")
    else:
        mon_create_nodes = './ceph-deploy mon create'+" "+mon_nodes
        estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
        if estatus_mon != 0:
            raise RuntimeError("ceph-deploy: Failed to create monitors")

    estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
    while (estatus_gather != 0):
        #mon_create_nodes = './ceph-deploy mon create'+" "+mon_node[0]
        #execute_ceph_deploy(ctx, config, mon_create_nodes)
        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)

    if mds_nodes:
        estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
        if estatus_mds != 0:
            raise RuntimeError("ceph-deploy: Failed to deploy mds")

    if config.get('test_mon_destroy') is not None:
        for d in range(1, len(mon_node)):
            mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d]
            estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes)
            if estatus_mon_d != 0:
                raise RuntimeError("ceph-deploy: Failed to delete monitor")

    node_dev_list = get_dev_for_osd(ctx, config)
    for d in node_dev_list:
        osd_create_cmds = './ceph-deploy osd create --zap-disk'+" "+d
        estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
        if estatus_osd == 0:
            log.info('successfully created osd')
            no_of_osds += 1
        else:
            zap_disk = './ceph-deploy disk zap'+" "+d
            execute_ceph_deploy(ctx, config, zap_disk)
            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")

    if config.get('wait-for-healthy', True) and no_of_osds >= 2:
        is_healthy(ctx=ctx, config=None)

        log.info('Setting up client nodes...')
        conf_path = '/etc/ceph/ceph.conf'
        admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
        first_mon = teuthology.get_first_mon(ctx, config)
        (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
        conf_data = teuthology.get_file(
            remote=mon0_remote,
            path=conf_path,
            sudo=True,
            )
        admin_keyring = teuthology.get_file(
            remote=mon0_remote,
            path=admin_keyring_path,
            sudo=True,
            )

        clients = ctx.cluster.only(teuthology.is_type('client'))
        for remot, roles_for_host in clients.remotes.iteritems():
            for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                mon0_remote.run(
                    args=[
                        'cd',
                        '{tdir}'.format(tdir=testdir),
                        run.Raw('&&'),
                        'sudo','bash','-c',
                        run.Raw('"'),'ceph',
                        'auth',
                        'get-or-create',
                        'client.{id}'.format(id=id_),
                        'mds', 'allow',
                        'mon', 'allow *',
                        'osd', 'allow *',
                        run.Raw('>'),
                        client_keyring,
                        run.Raw('"'),
                        ],
                    )
                key_data = teuthology.get_file(
                    remote=mon0_remote,
                    path=client_keyring,
                    sudo=True,
                    )
                teuthology.sudo_write_file(
                    remote=remot,
                    path=client_keyring,
                    data=key_data,
                    perms='0644'
                )
                teuthology.sudo_write_file(
                    remote=remot,
                    path=admin_keyring_path,
                    data=admin_keyring,
                    perms='0644'
                )
                teuthology.sudo_write_file(
                    remote=remot,
                    path=conf_path,
                    data=conf_data,
                    perms='0644'
                )
    else:
        raise RuntimeError("The cluster is NOT operational due to insufficient OSDs")

    try:
        yield

    finally:
        log.info('Stopping ceph...')
        ctx.cluster.run(args=[
                'sudo', 'stop', 'ceph-all',
                run.Raw('||'),
                'sudo', 'service', 'ceph', 'stop'
                ])

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                        ],
                    wait=False,
                    ),
                )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        log.info('Purging package...')
        execute_ceph_deploy(ctx, config, purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(ctx, config, purgedata_nodes)
Example #30
0
def install_kernel(remote, path=None, version=None):
    """
    A bit of misnomer perhaps - the actual kernel package is installed
    elsewhere, this function deals with initrd and grub.  Currently the
    following cases are handled:
      - local, gitbuilder, distro for rpm packages
      - distro for deb packages - see TODO in install_and_reboot()

    TODO: reboots should be issued from install_and_reboot()

    :param path:    package path (for local and gitbuilder cases)
    :param version: for RPM distro kernels, pass this to update_grub_rpm
    """
    templ = "install_kernel(remote={remote}, path={path}, version={version})"
    log.debug(templ.format(remote=remote, path=path, version=version))
    package_type = remote.os.package_type
    if package_type == 'rpm':
        if path:
            version = get_image_version(remote, path)
            # This is either a gitbuilder or a local package and both of these
            # could have been built with upstream rpm targets with specs that
            # don't have a %post section at all, which means no initrd.
            maybe_generate_initrd_rpm(remote, path, version)
        elif not version or version == 'distro':
            version = get_latest_image_version_rpm(remote)
        update_grub_rpm(remote, version)
        remote.run( args=['sudo', 'shutdown', '-r', 'now'], wait=False )
        return

    if package_type == 'deb':
        distribution = remote.os.name
        newversion = get_latest_image_version_deb(remote, distribution)
        if 'ubuntu' in distribution:
            grub2conf = teuthology.get_file(remote, '/boot/grub/grub.cfg', True)
            submenu = ''
            menuentry = ''
            for line in grub2conf.split('\n'):
                if 'submenu' in line:
                    submenu = line.split('submenu ')[1]
                    # Ubuntu likes to be sneaky and change formatting of
                    # grub.cfg between quotes/doublequotes between versions
                    if submenu.startswith("'"):
                        submenu = submenu.split("'")[1]
                    if submenu.startswith('"'):
                        submenu = submenu.split('"')[1]
                if 'menuentry' in line:
                    if newversion in line and 'recovery' not in line:
                        menuentry = line.split('\'')[1]
                        break
            if submenu:
                grubvalue = submenu + '>' + menuentry
            else:
                grubvalue = menuentry
            grubfile = 'cat <<EOF\nset default="' + grubvalue + '"\nEOF'
            teuthology.delete_file(remote, '/etc/grub.d/01_ceph_kernel', sudo=True, force=True)
            teuthology.sudo_write_file(remote, '/etc/grub.d/01_ceph_kernel', StringIO(grubfile), '755')
            log.info('Distro Kernel Version: {version}'.format(version=newversion))
            remote.run(args=['sudo', 'update-grub'])
            remote.run(args=['sudo', 'shutdown', '-r', 'now'], wait=False )
            return

        if 'debian' in distribution:
            grub2_kernel_select_generic(remote, newversion, 'deb')
            log.info('Distro Kernel Version: {version}'.format(version=newversion))
            remote.run( args=['sudo', 'shutdown', '-r', 'now'], wait=False )
            return
Example #31
0
def syslog(ctx, config):
    """
    start syslog / stop syslog on exit.
    """
    if ctx.archive is None:
        # disable this whole feature if we're not going to archive the data anyway
        yield
        return

    log.info("Starting syslog monitoring...")

    archive_dir = misc.get_archive_dir(ctx)
    log_dir = "{adir}/syslog".format(adir=archive_dir)
    run.wait(ctx.cluster.run(args=["mkdir", "-p", "-m0755", "--", log_dir], wait=False))

    CONF = "/etc/rsyslog.d/80-cephtest.conf"
    kern_log = "{log_dir}/kern.log".format(log_dir=log_dir)
    misc_log = "{log_dir}/misc.log".format(log_dir=log_dir)
    conf_lines = [
        "kern.* -{kern_log};RSYSLOG_FileFormat".format(kern_log=kern_log),
        "*.*;kern.none -{misc_log};RSYSLOG_FileFormat".format(misc_log=misc_log),
    ]
    conf_fp = StringIO("\n".join(conf_lines))
    try:
        for rem in ctx.cluster.remotes.iterkeys():
            log_context = "system_u:object_r:var_log_t:s0"
            for log_path in (kern_log, misc_log):
                rem.run(args="touch %s" % log_path)
                rem.chcon(log_path, log_context)
            misc.sudo_write_file(remote=rem, path=CONF, data=conf_fp)
            conf_fp.seek(0)
        run.wait(
            ctx.cluster.run(
                args=[
                    "sudo",
                    "service",
                    # a mere reload (SIGHUP) doesn't seem to make
                    # rsyslog open the files
                    "rsyslog",
                    "restart",
                ],
                wait=False,
            )
        )

        yield
    finally:
        log.info("Shutting down syslog monitoring...")

        run.wait(
            ctx.cluster.run(
                args=["sudo", "rm", "-f", "--", CONF, run.Raw("&&"), "sudo", "service", "rsyslog", "restart"],
                wait=False,
            )
        )
        # race condition: nothing actually says rsyslog had time to
        # flush the file fully. oh well.

        log.info("Checking logs for errors...")
        for rem in ctx.cluster.remotes.iterkeys():
            log.debug("Checking %s", rem.name)
            r = rem.run(
                args=[
                    "egrep",
                    "--binary-files=text",
                    "\\bBUG\\b|\\bINFO\\b|\\bDEADLOCK\\b",
                    run.Raw("{adir}/syslog/*.log".format(adir=archive_dir)),
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "task .* blocked for more than .* seconds",
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "lockdep is turned off",
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "trying to register non-static key",
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "DEBUG: fsize",  # xfs_fsr
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "CRON",  # ignore cron noise
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "BUG: bad unlock balance detected",  # #6097
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "inconsistent lock state",  # FIXME see #2523
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "*** DEADLOCK ***",  # part of lockdep output
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "INFO: possible irq lock inversion dependency detected",  # FIXME see #2590 and #147
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "INFO: NMI handler (perf_event_nmi_handler) took too long to run",
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "INFO: recovery required on readonly",
                    run.Raw("|"),
                    "grep",
                    "-v",
                    "ceph-create-keys: INFO",
                    run.Raw("|"),
                    "head",
                    "-n",
                    "1",
                ],
                stdout=StringIO(),
            )
            stdout = r.stdout.getvalue()
            if stdout != "":
                log.error("Error in syslog on %s: %s", rem.name, stdout)
                set_status(ctx.summary, "fail")
                if "failure_reason" not in ctx.summary:
                    ctx.summary["failure_reason"] = "'{error}' in syslog".format(error=stdout)

        log.info("Compressing syslogs...")
        run.wait(
            ctx.cluster.run(
                args=[
                    "find",
                    "{adir}/syslog".format(adir=archive_dir),
                    "-name",
                    "*.log",
                    "-print0",
                    run.Raw("|"),
                    "sudo",
                    "xargs",
                    "-0",
                    "--no-run-if-empty",
                    "--",
                    "gzip",
                    "--",
                ],
                wait=False,
            )
        )
Example #32
0
def syslog(ctx, config):
    """
    start syslog / stop syslog on exit.
    """
    if ctx.archive is None:
        # disable this whole feature if we're not going to archive the data anyway
        yield
        return

    log.info('Starting syslog monitoring...')

    archive_dir = misc.get_archive_dir(ctx)
    run.wait(
        ctx.cluster.run(
            args=[
                'mkdir',
                '-m0755',
                '--',
                '{adir}/syslog'.format(adir=archive_dir),
            ],
            wait=False,
        ))

    CONF = '/etc/rsyslog.d/80-cephtest.conf'
    conf_fp = StringIO('''
kern.* -{adir}/syslog/kern.log;RSYSLOG_FileFormat
*.*;kern.none -{adir}/syslog/misc.log;RSYSLOG_FileFormat
'''.format(adir=archive_dir))
    try:
        for rem in ctx.cluster.remotes.iterkeys():
            misc.sudo_write_file(
                remote=rem,
                path=CONF,
                data=conf_fp,
            )
            conf_fp.seek(0)
        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'service',
                    # a mere reload (SIGHUP) doesn't seem to make
                    # rsyslog open the files
                    'rsyslog',
                    'restart',
                ],
                wait=False,
            ), )

        yield
    finally:
        log.info('Shutting down syslog monitoring...')

        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'rm',
                    '-f',
                    '--',
                    CONF,
                    run.Raw('&&'),
                    'sudo',
                    'service',
                    'rsyslog',
                    'restart',
                ],
                wait=False,
            ), )
        # race condition: nothing actually says rsyslog had time to
        # flush the file fully. oh well.

        log.info('Checking logs for errors...')
        for rem in ctx.cluster.remotes.iterkeys():
            log.debug('Checking %s', rem.name)
            r = rem.run(
                args=[
                    'egrep',
                    '--binary-files=text',
                    '\\bBUG\\b|\\bINFO\\b|\\bDEADLOCK\\b',
                    run.Raw('{adir}/syslog/*.log'.format(adir=archive_dir)),
                    run.Raw('|'),
                    'grep',
                    '-v',
                    'task .* blocked for more than .* seconds',
                    run.Raw('|'),
                    'grep',
                    '-v',
                    'lockdep is turned off',
                    run.Raw('|'),
                    'grep',
                    '-v',
                    'trying to register non-static key',
                    run.Raw('|'),
                    'grep',
                    '-v',
                    'DEBUG: fsize',  # xfs_fsr
                    run.Raw('|'),
                    'grep',
                    '-v',
                    'CRON',  # ignore cron noise
                    run.Raw('|'),
                    'grep',
                    '-v',
                    'BUG: bad unlock balance detected',  # #6097
                    run.Raw('|'),
                    'grep',
                    '-v',
                    'inconsistent lock state',  # FIXME see #2523
                    run.Raw('|'),
                    'grep',
                    '-v',
                    '*** DEADLOCK ***',  # part of lockdep output
                    run.Raw('|'),
                    'grep',
                    '-v',
                    'INFO: possible irq lock inversion dependency detected',  # FIXME see #2590 and #147
                    run.Raw('|'),
                    'grep',
                    '-v',
                    'INFO: NMI handler (perf_event_nmi_handler) took too long to run',
                    run.Raw('|'),
                    'grep',
                    '-v',
                    'INFO: recovery required on readonly',
                    run.Raw('|'),
                    'head',
                    '-n',
                    '1',
                ],
                stdout=StringIO(),
            )
            stdout = r.stdout.getvalue()
            if stdout != '':
                log.error('Error in syslog on %s: %s', rem.name, stdout)
                set_status(ctx.summary, 'fail')
                if 'failure_reason' not in ctx.summary:
                    ctx.summary['failure_reason'] = \
                        "'{error}' in syslog".format(error=stdout)

        log.info('Compressing syslogs...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'find',
                    '{adir}/syslog'.format(adir=archive_dir),
                    '-name',
                    '*.log',
                    '-print0',
                    run.Raw('|'),
                    'sudo',
                    'xargs',
                    '-0',
                    '--no-run-if-empty',
                    '--',
                    'gzip',
                    '--',
                ],
                wait=False,
            ), )
Example #33
0
 def write_tempconf(self, confcontents):
     self.tempconfpath = self.mount_a.client_remote.mktemp(
         suffix='cephfs-shell.conf')
     sudo_write_file(self.mount_a.client_remote, self.tempconfpath,
                     confcontents)