コード例 #1
0
ファイル: hadoop.py プロジェクト: AlfredChenxf/teuthology
def validate_cluster(ctx):
    """
    Check that there is exactly one master and at least one slave configured
    """
    log.info('Vaidating Hadoop configuration')
    slaves = ctx.cluster.only(teuthology.is_type('hadoop.slave'))

    if (len(slaves.remotes) < 1):
        raise Exception("At least one hadoop.slave must be specified")
    else:
        log.info(str(len(slaves.remotes)) + " slaves specified")

    masters = ctx.cluster.only(teuthology.is_type('hadoop.master'))
    if (len(masters.remotes) == 1):
        pass
    else:
        raise Exception(
           "Exactly one hadoop.master must be specified. Currently there are "
           + str(len(masters.remotes)))

    try:
        yield

    finally:
        pass
コード例 #2
0
ファイル: blktrace.py プロジェクト: dzafman/teuthology
def execute(ctx, config):
    procs = []
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote]
        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            if roles_to_devs.get(id_):
                dev = roles_to_devs[id_]
                log.info("running blktrace on %s: %s" % (remote.name, dev))

                proc = remote.run(
                    args=[
                        'cd',
                        log_dir,
                        run.Raw(';'),
                        '/tmp/cephtest/daemon-helper',
                        daemon_signal,
                        'sudo',
                        blktrace,
                        '-o',
                        dev.rsplit("/", 1)[1],
                        '-d',
                        dev,
                        ],
                    wait=False,   
                    stdin=run.PIPE,
                    )
                procs.append(proc)
    try:
        yield
    finally:
        osds = ctx.cluster.only(teuthology.is_type('osd'))
        log.info('stopping blktrace processs')
        for proc in procs:
            proc.stdin.close()
コード例 #3
0
ファイル: rebuild_mondb.py プロジェクト: Abhishekvrshny/ceph
def task(ctx, config):
    """
    Test monitor recovery from OSD
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'task only accepts a dict for configuration'

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        logger=log.getChild('ceph_manager'))

    mons = ctx.cluster.only(teuthology.is_type('mon'))
    # note down the first cluster_name and mon_id
    # we will recover it later on
    cluster_name, _, mon_id = teuthology.split_role(first_mon)
    _nuke_mons(manager, mons, mon_id)
    default_keyring = '/etc/ceph/{cluster}.keyring'.format(
        cluster=cluster_name)
    keyring_path = config.get('keyring_path', default_keyring)
    _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path)
    _revive_mons(manager, mons, mon_id, keyring_path)
    _revive_mgrs(ctx, manager)
    _revive_osds(ctx, manager)
コード例 #4
0
ファイル: ceph.py プロジェクト: Abhishekvrshny/ceph-qa-suite
def healthy(ctx, config):
    """
    Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK.

    :param ctx: Context
    :param config: Configuration
    """
    config = config if isinstance(config, dict) else dict()
    cluster_name = config.get('cluster', 'ceph')
    log.info('Waiting until ceph cluster %s is healthy...', cluster_name)
    firstmon = teuthology.get_first_mon(ctx, config, cluster_name)
    (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
    teuthology.wait_until_osds_up(
        ctx,
        cluster=ctx.cluster,
        remote=mon0_remote,
        ceph_cluster=cluster_name,
    )
    teuthology.wait_until_healthy(
        ctx,
        remote=mon0_remote,
        ceph_cluster=cluster_name,
    )

    if ctx.cluster.only(teuthology.is_type('mds', cluster_name)).remotes:
        # Some MDSs exist, wait for them to be healthy
        ceph_fs = Filesystem(ctx) # TODO: make Filesystem cluster-aware
        ceph_fs.wait_for_daemons(timeout=300)
コード例 #5
0
ファイル: hadoop.py プロジェクト: ErwanAliasr1/teuthology
def get_masters_data(ctx):
    tempdir = teuthology.get_testdir(ctx)
    path = "{tdir}/hadoop/etc/hadoop/masters".format(tdir=tempdir)
    nodes = ctx.cluster.only(teuthology.is_type('hadoop.master'))
    hosts = [s.ssh.get_transport().getpeername()[0] for s in nodes.remotes]
    data = '\n'.join(hosts)
    return path, data
コード例 #6
0
ファイル: hadoop.py プロジェクト: ErwanAliasr1/teuthology
def get_core_site_data(ctx, config):
    tempdir = teuthology.get_testdir(ctx)
    path = "{tdir}/hadoop/etc/hadoop/core-site.xml".format(tdir=tempdir)
    nodes = ctx.cluster.only(teuthology.is_type('hadoop.master'))
    host = [s.ssh.get_transport().getpeername()[0] for s in nodes.remotes][0]

    conf = {}
    if config.get('hdfs', False):
        conf.update({
            'fs.defaultFS': 'hdfs://{namenode}:9000',
            'hadoop.tmp.dir': '{tdir}/hadoop_tmp',
        })
    else:
        conf.update({
            'fs.default.name': 'ceph://{namenode}:6789/',
            'fs.defaultFS': 'ceph://{namenode}:6789/',
            'ceph.conf.file': '/etc/ceph/ceph.conf',
            'ceph.mon.address': '{namenode}:6789',
            'ceph.auth.id': 'admin',
            #'ceph.data.pools': 'cephfs_data',
            'fs.AbstractFileSystem.ceph.impl': 'org.apache.hadoop.fs.ceph.CephFs',
            'fs.ceph.impl': 'org.apache.hadoop.fs.ceph.CephFileSystem',
        })

    data_tmpl = dict_to_hadoop_conf(conf)
    return path, data_tmpl.format(tdir=tempdir, namenode=host)
コード例 #7
0
ファイル: rest-api.py プロジェクト: kri5/teuthology
def run_rest_api_daemon(ctx, api_clients):
    if not hasattr(ctx, 'daemons'):
        ctx.daemons = CephState()
    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
    for rems, roles in remotes.iteritems():
        for whole_id_ in roles:
            if whole_id_ in api_clients:
                id_ = whole_id_[len('clients'):]
                run_cmd = [
                    'sudo',
                    'daemon-helper',
                    'kill',
                    'ceph-rest-api',
                    '-n',
                    'client.rest{id}'.format(id=id_), ]
                cl_rest_id = 'client.rest{id}'.format(id=id_)
                ctx.daemons.add_daemon(rems, 'restapi',
                    cl_rest_id,
                    args=run_cmd,
                    logger=log.getChild(cl_rest_id),
                    stdin=run.PIPE,
                    wait=False,
                    )
    try:
        yield

    finally:
        """
        TO DO: destroy daemons started -- modify iter_daemons_of_role
        """
        teuthology.stop_daemons_of_type(ctx, 'restapi')
コード例 #8
0
ファイル: hadoop.py プロジェクト: athanatos/teuthology
def write_mapred_site(ctx):
    mapredSiteFile = "{tdir}/apache_hadoop/conf/mapred-site.xml".format(tdir=teuthology.get_testdir(ctx))

    master_ip = get_hadoop_master_ip(ctx)
    log.info("adding host {remote} as jobtracker".format(remote=master_ip))

    hadoopNodes = ctx.cluster.only(teuthology.is_type("hadoop"))
    for remote, roles_for_host in hadoopNodes.remotes.iteritems():
        teuthology.write_file(
            remote,
            mapredSiteFile,
            """<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
    <property>
        <name>mapred.job.tracker</name>
        <value>{remote}:54311</value>
    </property>
</configuration>  
""".format(
                remote=master_ip
            ),
        )

        log.info("wrote file: " + mapredSiteFile + " to host: " + str(remote))
コード例 #9
0
ファイル: ceph_client.py プロジェクト: AsherBond/teuthology
def create_keyring(ctx):
    log.info('Setting up client nodes...')
    clients = ctx.cluster.only(teuthology.is_type('client'))
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
    for remote, roles_for_host in clients.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
            client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
            remote.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
                    '--name=client.{id}'.format(id=id_),
                    client_keyring,
                    run.Raw('&&'),
                    'sudo',
                    'chmod',
                    '0644',
                    client_keyring,
                    ],
                )
コード例 #10
0
ファイル: hadoop.py プロジェクト: AlfredChenxf/teuthology
def write_mapred_site(ctx):
    """
    Add required entries to conf/mapred-site.xml
    """
    mapred_site_file = "{tdir}/apache_hadoop/conf/mapred-site.xml".format(
            tdir=teuthology.get_testdir(ctx))

    master_ip = get_hadoop_master_ip(ctx)
    log.info('adding host {remote} as jobtracker'.format(remote=master_ip))

    hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
    for remote in hadoop_nodes.remotes:
        teuthology.write_file(remote, mapred_site_file,
'''<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
    <property>
        <name>mapred.job.tracker</name>
        <value>{remote}:54311</value>
    </property>
</configuration>
'''.format(remote=master_ip))

        log.info("wrote file: " + mapred_site_file + " to host: " + str(remote))
コード例 #11
0
def create_keyring(ctx, cluster_name):
    """
    Set up key ring on remote sites
    """
    log.info('Setting up client nodes...')
    clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
    for remote, roles_for_host in clients.remotes.iteritems():
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
                                                     cluster_name):
            name = teuthology.ceph_role(role)
            client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, name)
            remote.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
                    '--name={name}'.format(name=name),
                    client_keyring,
                    run.Raw('&&'),
                    'sudo',
                    'chmod',
                    '0644',
                    client_keyring,
                    ],
                )
コード例 #12
0
ファイル: ceph.py プロジェクト: kawaguchi-s/ceph-qa-suite
def cephfs_setup(ctx, config):
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys()
    mdss = ctx.cluster.only(teuthology.is_type('mds'))
    # If there are any MDSs, then create a filesystem for them to use
    # Do this last because requires mon cluster to be up and running
    if mdss.remotes:
        log.info('Setting up CephFS filesystem...')

        ceph_fs = Filesystem(ctx)
        if not ceph_fs.legacy_configured():
            ceph_fs.create()

        is_active_mds = lambda role: role.startswith('mds.') and not role.endswith('-s') and role.find('-s-') == -1
        all_roles = [item for remote_roles in mdss.remotes.values() for item in remote_roles]
        num_active = len([r for r in all_roles if is_active_mds(r)])
        mon_remote.run(args=[
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph',
            'mds', 'set_max_mds', str(num_active)])

    yield
コード例 #13
0
ファイル: hadoop.py プロジェクト: dzafman/teuthology
def write_core_site(ctx, config):
    coreSiteFile = "/tmp/cephtest/hadoop/conf/core-site.xml" 

    hadoopNodes = ctx.cluster.only(teuthology.is_type('hadoop'))
    for remote, roles_for_host in hadoopNodes.remotes.iteritems():

        # check the config to see if we should use hdfs or ceph
        default_fs_string = ""
        if config.get('hdfs'):
            default_fs_string = 'hdfs://{master_ip}:54310'.format(master_ip=get_hadoop_master_ip(ctx))
        else:
            default_fs_string = 'ceph:///'

        teuthology.write_file(remote, coreSiteFile, 
'''<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file.  -->
<configuration>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/tmp/hadoop/tmp</value>
    </property>
    <property>
        <name>fs.default.name</name>
        <value>{default_fs}</value>
    </property>
    <property>
        <name>ceph.conf.file</name>
        <value>/tmp/cephtest/ceph.conf</value>
    </property>
</configuration>
'''.format(default_fs=default_fs_string))

        log.info("wrote file: " + coreSiteFile + " to host: " + str(remote))
コード例 #14
0
ファイル: rebuild_mondb.py プロジェクト: Abhishekvrshny/ceph
def _revive_mons(manager, mons, recovered, keyring_path):
    # revive monitors
    # the initial monmap is in the ceph.conf, so we are good.
    n_mons = 0
    is_mon = teuthology.is_type('mon')
    for remote, roles in mons.remotes.iteritems():
        for role in roles:
            if not is_mon(role):
                continue
            cluster, _, m = teuthology.split_role(role)
            if recovered != m:
                log.info('running mkfs on {cluster}:mon.{mon}'.format(
                    cluster=cluster,
                    mon=m))
                remote.run(
                    args=[
                        'sudo',
                        'ceph-mon',
                        '--cluster', cluster,
                        '--mkfs',
                        '-i', m,
                        '--keyring', keyring_path])
            log.info('reviving mon.{0}'.format(m))
            manager.revive_mon(m)
            n_mons += 1
    manager.wait_for_mon_quorum_size(n_mons, timeout=30)
コード例 #15
0
ファイル: workunit.py プロジェクト: beess/ceph
def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None):
    """
    Make a scratch directory for each client in the cluster, and then for each
    test spawn _run_tests() for each role.

    See run_tests() for parameter documentation.
    """
    is_client = misc.is_type('client')
    client_remotes = {}
    created_mountpoint = {}
    for remote, roles_for_host in ctx.cluster.remotes.items():
        for role in roles_for_host:
            if is_client(role):
                client_remotes[role] = remote
                created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir)

    for unit in tests:
        with parallel() as p:
            for role, remote in client_remotes.items():
                p.spawn(_run_tests, ctx, refspec, role, [unit], env, subdir,
                        timeout=timeout)

    # cleanup the generated client directories
    for role, _ in client_remotes.items():
        _delete_dir(ctx, role, created_mountpoint[role])
コード例 #16
0
ファイル: hadoop.py プロジェクト: ErwanAliasr1/teuthology
def configure(ctx, config, hadoops):
    tempdir = teuthology.get_testdir(ctx)

    log.info("Writing Hadoop slaves file...")
    for remote in hadoops.remotes:
        path, data = get_slaves_data(ctx)
        teuthology.write_file(remote, path, StringIO(data))

    log.info("Writing Hadoop masters file...")
    for remote in hadoops.remotes:
        path, data = get_masters_data(ctx)
        teuthology.write_file(remote, path, StringIO(data))

    log.info("Writing Hadoop core-site.xml file...")
    for remote in hadoops.remotes:
        path, data = get_core_site_data(ctx, config)
        teuthology.write_file(remote, path, StringIO(data))

    log.info("Writing Hadoop yarn-site.xml file...")
    for remote in hadoops.remotes:
        path, data = get_yarn_site_data(ctx)
        teuthology.write_file(remote, path, StringIO(data))

    log.info("Writing Hadoop hdfs-site.xml file...")
    for remote in hadoops.remotes:
        path, data = get_hdfs_site_data(ctx)
        teuthology.write_file(remote, path, StringIO(data))

    log.info("Writing Hadoop mapred-site.xml file...")
    for remote in hadoops.remotes:
        path, data = get_mapred_site_data(ctx)
        teuthology.write_file(remote, path, StringIO(data))

    log.info("Setting JAVA_HOME in hadoop-env.sh...")
    for remote in hadoops.remotes:
        path = "{tdir}/hadoop/etc/hadoop/hadoop-env.sh".format(tdir=tempdir)
        if remote.os.package_type == 'rpm':
            data = "JAVA_HOME=/usr/lib/jvm/java\n"
        elif remote.os.package_type == 'deb':
            data = "JAVA_HOME=/usr/lib/jvm/default-java\n"
        else:
            raise UnsupportedPackageTypeError(remote)
        teuthology.prepend_lines_to_file(remote, path, data)

    if config.get('hdfs', False):
        log.info("Formatting HDFS...")
        testdir = teuthology.get_testdir(ctx)
        hadoop_dir = "{tdir}/hadoop/".format(tdir=testdir)
        masters = ctx.cluster.only(teuthology.is_type('hadoop.master'))
        assert len(masters.remotes) == 1
        master = masters.remotes.keys()[0]
        master.run(
            args = [
                hadoop_dir + "bin/hadoop",
                "namenode",
                "-format"
            ],
            wait = True,
            )
コード例 #17
0
ファイル: hadoop.py プロジェクト: dzafman/teuthology
def write_slaves(ctx):
    log.info('Setting up slave nodes...')

    slavesFile = "/tmp/cephtest/hadoop/conf/slaves"
    tmpFile = StringIO()

    slaves = ctx.cluster.only(teuthology.is_type('hadoop.slave'))
    for remote, roles_for_host in slaves.remotes.iteritems():
        tmpFile.write('{remote}\n'.format(remote=remote.ssh.get_transport().getpeername()[0]))

    tmpFile.seek(0)

    hadoopNodes = ctx.cluster.only(teuthology.is_type('hadoop'))
    for remote, roles_for_host in hadoopNodes.remotes.iteritems():
        teuthology.write_file(remote=remote, path=slavesFile, data=tmpFile)
        tmpFile.seek(0)
        log.info("wrote file: " + slavesFile + " to host: " + str(remote))
コード例 #18
0
ファイル: hadoop.py プロジェクト: athanatos/teuthology
def write_slaves(ctx):
    log.info("Setting up slave nodes...")

    slavesFile = "{tdir}/apache_hadoop/conf/slaves".format(tdir=teuthology.get_testdir(ctx))
    tmpFile = StringIO()

    slaves = ctx.cluster.only(teuthology.is_type("hadoop.slave"))
    for remote, roles_for_host in slaves.remotes.iteritems():
        tmpFile.write("{remote}\n".format(remote=remote.ssh.get_transport().getpeername()[0]))

    tmpFile.seek(0)

    hadoopNodes = ctx.cluster.only(teuthology.is_type("hadoop"))
    for remote, roles_for_host in hadoopNodes.remotes.iteritems():
        teuthology.write_file(remote=remote, path=slavesFile, data=tmpFile)
        tmpFile.seek(0)
        log.info("wrote file: " + slavesFile + " to host: " + str(remote))
コード例 #19
0
ファイル: blktrace.py プロジェクト: dzafman/teuthology
def setup(ctx, config):
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        log.info('Creating %s on %s' % (log_dir,remote.name))
        remote.run(
            args=['mkdir', '-p', '-m0755', '--', log_dir],
            wait=False,
            )
    yield
コード例 #20
0
ファイル: hadoop.py プロジェクト: athanatos/teuthology
def binaries(ctx, config):
    path = config.get("path")

    if path is None:
        # fetch Apache Hadoop from gitbuilder
        log.info("Fetching and unpacking Apache Hadoop binaries from gitbuilder...")
        apache_sha1, apache_hadoop_bindir_url = teuthology.get_ceph_binary_url(
            package="apache-hadoop",
            branch=config.get("apache_branch"),
            tag=config.get("tag"),
            sha1=config.get("sha1"),
            flavor=config.get("flavor"),
            format=config.get("format"),
            dist=config.get("dist"),
            arch=config.get("arch"),
        )
        log.info("apache_hadoop_bindir_url %s" % (apache_hadoop_bindir_url))
        ctx.summary["apache-hadoop-sha1"] = apache_sha1

        # fetch Inktank Hadoop from gitbuilder
        log.info("Fetching and unpacking Inktank Hadoop binaries from gitbuilder...")
        inktank_sha1, inktank_hadoop_bindir_url = teuthology.get_ceph_binary_url(
            package="hadoop",
            branch=config.get("inktank_branch"),
            tag=config.get("tag"),
            sha1=config.get("sha1"),
            flavor=config.get("flavor"),
            format=config.get("format"),
            dist=config.get("dist"),
            arch=config.get("arch"),
        )
        log.info("inktank_hadoop_bindir_url %s" % (inktank_hadoop_bindir_url))
        ctx.summary["inktank-hadoop-sha1"] = inktank_sha1

    else:
        raise Exception("The hadoop task does not support the path argument at present")

    with parallel() as p:
        hadoopNodes = ctx.cluster.only(teuthology.is_type("hadoop"))
        # these can happen independently
        for remote in hadoopNodes.remotes.iterkeys():
            p.spawn(_node_binaries, ctx, config, remote, inktank_hadoop_bindir_url, apache_hadoop_bindir_url)

    try:
        yield
    finally:
        log.info("Removing hadoop binaries...")
        run.wait(
            ctx.cluster.run(
                args=["rm", "-rf", "--", "{tdir}/apache_hadoop".format(tdir=teuthology.get_testdir(ctx))], wait=False
            )
        )
        run.wait(
            ctx.cluster.run(
                args=["rm", "-rf", "--", "{tdir}/inktank_hadoop".format(tdir=teuthology.get_testdir(ctx))], wait=False
            )
        )
コード例 #21
0
ファイル: hadoop.py プロジェクト: AlfredChenxf/teuthology
def _get_master(ctx):
    """
    Return the hadoop master.  If more than one is found, fail an assertion
    """
    master = ctx.cluster.only(teuthology.is_type('hadoop.master'))
    assert 1 == len(master.remotes.items()), \
            'There must be exactly 1 hadoop.master configured'

    return master.remotes.items()[0]
コード例 #22
0
ファイル: rebuild_mondb.py プロジェクト: Abhishekvrshny/ceph
def _revive_osds(ctx, manager):
    is_osd = teuthology.is_type('osd')
    osds = ctx.cluster.only(is_osd)
    for _, roles in osds.remotes.iteritems():
        for role in roles:
            if not is_osd(role):
                continue
            _, _, osd_id = teuthology.split_role(role)
            log.info('reviving osd.{0}'.format(osd_id))
            manager.revive_osd(osd_id)
コード例 #23
0
ファイル: iscsi.py プロジェクト: hughsaunders/teuthology
def tgt_devname_get(ctx, test_image):
    """
    Get the name of the newly created device by following the by-path
    link (which is symbolically linked to the appropriate /dev/sd* file).
    """
    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
    rem_name = _get_remote_name(remotes, test_image)
    lnkpath = '/dev/disk/by-path/ip-%s:3260-iscsi-rbd-lun-1' % \
            socket.gethostbyname(rem_name)
    return lnkpath
コード例 #24
0
ファイル: hadoop.py プロジェクト: dzafman/teuthology
def write_master(ctx):
    mastersFile = "/tmp/cephtest/hadoop/conf/masters"
    master = _get_master(ctx)
    remote, _ = master


    hadoopNodes = ctx.cluster.only(teuthology.is_type('hadoop'))
    for remote, roles_for_host in hadoopNodes.remotes.iteritems():
        teuthology.write_file(remote, mastersFile, '{remote}\n'.format(remote=remote.ssh.get_transport().getpeername()[0]))
        log.info("wrote file: " + mastersFile + " to host: " + str(remote))
コード例 #25
0
ファイル: rebuild_mondb.py プロジェクト: Abhishekvrshny/ceph
def _revive_mgrs(ctx, manager):
    is_mgr = teuthology.is_type('mgr')
    mgrs = ctx.cluster.only(is_mgr)
    for _, roles in mgrs.remotes.iteritems():
        for role in roles:
            if not is_mgr(role):
                continue
            _, _, mgr_id = teuthology.split_role(role)
            log.info('reviving mgr.{0}'.format(mgr_id))
            manager.revive_mgr(mgr_id)
コード例 #26
0
ファイル: tgt.py プロジェクト: Abhishekvrshny/ceph-qa-suite
def task(ctx, config):
    """
    Start up tgt.

    To start on on all clients::

        tasks:
        - ceph:
        - tgt:

    To start on certain clients::

        tasks:
        - ceph:
        - tgt: [client.0, client.3]

    or

        tasks:
        - ceph:
        - tgt:
            client.0:
            client.3:

    An image blocksize size can also be specified::
        
        tasks:
        - ceph:
        - tgt:
            image_size = 20480

    The general flow of things here is:
        1. Find clients on which tgt is supposed to run (start_tgtd)
        2. Remotely start up tgt daemon
    On cleanup:
        3. Stop tgt daemon

    The iscsi administration is handled by the iscsi task.
    """
    if config:
        config = {key : val for key, val in config.items()
                if key.startswith('client')}
    # config at this point should only contain keys starting with 'client'
    start_tgtd = []
    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
    log.info(remotes)
    if not config:
        start_tgtd = ['client.{id}'.format(id=id_)
            for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
    else:
        start_tgtd = config
    log.info(start_tgtd)
    with contextutil.nested(
            lambda: start_tgt_remotes(ctx=ctx, start_tgtd=start_tgtd),):
        yield
コード例 #27
0
ファイル: ceph.py プロジェクト: hgichon/anycloud-test
def create_ceph_conf(ctx, config):

    devs_to_clean = {}
    remote_to_roles_to_devs = {}
    remote_to_roles_to_journals = {}
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        devs = teuthology.get_scratch_devices(remote)
        roles_to_devs = {}
        roles_to_journals = {}
        if config.get('fs'):
            log.info('fs option selected, checking for scratch devs')
            log.info('found devs: %s' % (str(devs),))
            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
            iddevs = devs_id_map.values()
            roles_to_devs = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
                )
            if len(roles_to_devs) < len(iddevs):
                iddevs = iddevs[len(roles_to_devs):]
            devs_to_clean[remote] = []

        if config.get('block_journal'):
            log.info('block journal enabled')
            roles_to_journals = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs
                )
            log.info('journal map: %s', roles_to_journals)

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled')
            roles_to_journals = {}
            remote.run( args=[ 'sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt' ] )
            for osd in teuthology.roles_of_type(roles_for_host, 'osd'):
                tmpfs = '/mnt/osd.%s' % osd
                roles_to_journals[osd] = tmpfs
                remote.run( args=[ 'truncate', '-s', '1500M', tmpfs ] )
            log.info('journal map: %s', roles_to_journals)

        log.info('dev map: %s' % (str(roles_to_devs),))
        remote_to_roles_to_devs[remote] = roles_to_devs
        remote_to_roles_to_journals[remote] = roles_to_journals

    log.info('Generating config...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [role_list for (remote, role_list) in remotes_and_roles]
    ips = [host for (host, port) in (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
    conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips)

    ctx.ceph = argparse.Namespace()
    ctx.ceph.conf = conf

    log.info(ctx)
    
    yield
コード例 #28
0
def execute(ctx, config):
    """
    Run the blktrace program on remote machines.
    """
    procs = []
    testdir = teuthology.get_testdir(ctx)
    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir)

    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote]
        for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd',
                                                     config['cluster']):
            if roles_to_devs.get(role):
                dev = roles_to_devs[role]
                log.info("running blktrace on %s: %s" % (remote.name, dev))

                proc = remote.run(
                    args=[
                        'cd',
                        log_dir,
                        run.Raw(';'),
                        'daemon-helper',
                        daemon_signal,
                        'sudo',
                        blktrace,
                        '-o',
                        dev.rsplit("/", 1)[1],
                        '-d',
                        dev,
                        ],
                    wait=False,
                    stdin=run.PIPE,
                    )
                procs.append(proc)
    try:
        yield
    finally:
        osds = ctx.cluster.only(teuthology.is_type('osd'))
        log.info('stopping blktrace processs')
        for proc in procs:
            proc.stdin.close()
コード例 #29
0
ファイル: hadoop.py プロジェクト: athanatos/teuthology
def validate_config(ctx, config):
    log.info("Vaidating Hadoop configuration")
    slaves = ctx.cluster.only(teuthology.is_type("hadoop.slave"))

    if len(slaves.remotes) < 1:
        raise Exception("At least one hadoop.slave must be specified")
    else:
        log.info(str(len(slaves.remotes)) + " slaves specified")

    masters = ctx.cluster.only(teuthology.is_type("hadoop.master"))
    if len(masters.remotes) == 1:
        pass
    else:
        raise Exception("Exactly one hadoop.master must be specified. Currently there are " + str(len(masters.remotes)))

    try:
        yield

    finally:
        pass
コード例 #30
0
ファイル: blktrace.py プロジェクト: AsherBond/teuthology
def setup(ctx, config):
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=teuthology.get_testdir(ctx))

    for remote, roles_for_host in osds.remotes.iteritems():
        log.info('Creating %s on %s' % (log_dir,remote.name))
        remote.run(
            args=['mkdir', '-p', '-m0755', '--', log_dir],
            wait=False,
            )
    yield
コード例 #31
0
def task(ctx, config):

    log.info('starting nfs_ganesha_rgw tests')
    # RGW and NFS should be on the same machine

    if config is None:
        config = {}

    assert isinstance(config, dict), \
        "task set-repo only supports a dictionary for configuration"

    test_name = config['test-name'] + ".yaml"
    script_name = tests_mapper.get(config['test-name'], None) + ".py"
    nfs_version = config['nfs-version']
    mount_dir = config['mount-dir']
    branch = config.get("branch", "master")

    log.info('got test_name: %s' % test_name)
    log.info('got nfs version: %s' % nfs_version)
    log.info('got mount dir: %s' % mount_dir)

    remotes = ctx.cluster.only(teuthology.is_type('mon'))
    mon = [remote for remote, roles_for_host in remotes.remotes.items()]

    rgw_remote = ctx.cluster.only(teuthology.is_type('rgw'))
    rgw = [remote for remote, roles_for_host in rgw_remote.remotes.items()]

    # installing nfs-ganesha-selinux package
    if rgw[0].os.version.startswith('7'):
        rgw[0].run(
            args=['sudo', 'yum', 'install', '-y', 'nfs-ganesha-selinux'])

    # clone the repo

    rgw[0].run(args=['sudo', 'rm', '-rf', 'nfs_ganesha_rgw'],
               check_status=False)
    rgw[0].run(args=['sudo', 'rm', '-rf',
                     run.Raw('/tmp/nfs-ganesh-rgw_log*')],
               check_status=False)
    rgw[0].run(args=['mkdir', '-p', 'nfs_ganesha_rgw'])

    # stop native nfs_ganesha service.

    rgw[0].run(args=['sudo', 'systemctl', 'stop', 'nfs-server.service'
                     ])  # systemctl stop nfs-server.service
    rgw[0].run(args=['sudo', 'systemctl', 'disable', 'nfs-server.service'
                     ])  # systemctl disable nfs-server.service

    out = io.StringIO()
    mon[0].run(args=['sudo', 'cat', '/etc/ceph/ceph.client.admin.keyring'],
               stdout=out)
    v_as_out = out.read()
    teuthology.create_file(rgw[0],
                           '/etc/ceph/ceph.client.admin.keyring',
                           data=v_as_out,
                           sudo=True)

    # parsing nfs_ganesha conf file

    out = io.StringIO()
    rgw[0].run(args=['sudo', 'cat', '/etc/ganesha/ganesha.conf'], stdout=out)
    v_as_out = out.readlines()

    clean = lambda x: re.sub('[^A-Za-z0-9]+', '', x)

    for content in v_as_out:

        if 'Access_Key_Id' in content:
            access_key = clean(content.split('=')[1])

        if 'Secret_Access_Key' in content:
            secret_key = clean(content.split('=')[1])

        if 'User_Id' in content:
            rgw_user_id = clean(content.split('=')[1])

        if 'Pseudo' in content:
            pseudo = content.split('=')[1].strip(' ').strip('\n').strip(
                ' ').strip(';').strip('/')

    rgw[0].run(args=['sudo', 'setenforce', '1'])

    log.info('restarting nfs-ganesha service')

    rgw[0].run(args=['sudo', 'systemctl', 'restart', 'nfs-ganesha.service'])

    time.sleep(60)

    rgw[0].run(args=[
        'cd', 'nfs_ganesha_rgw',
        run.Raw(';'), 'git', 'clone',
        'https://github.com/red-hat-storage/ceph-qe-scripts.git'
    ])

    rgw[0].run(args=[
        'cd', 'nfs_ganesha_rgw/ceph-qe-scripts',
        run.Raw(';'), 'git', 'checkout',
        '%s' % branch
    ])

    rgw[0].run(args=['python3', '-m', 'venv', 'venv'])

    rgw[0].run(args=[
        'source', 'venv/bin/activate',
        run.Raw(';'),
        run.Raw('pip3 install --upgrade setuptools'),
        run.Raw(';'), 'deactivate'
    ])

    rgw[0].run(args=[
        'source', 'venv/bin/activate',
        run.Raw(';'),
        run.Raw(
            'pip3 install boto boto3 names PyYaml psutil ConfigParser python-swiftclient '
            'swiftly simplejson rgwadmin'),
        run.Raw(';'), 'deactivate'
    ])

    # copy rgw user details (yaml format) to nfs node or rgw node

    rgw_user_config = dict(user_id=rgw_user_id,
                           access_key=access_key,
                           secret_key=secret_key,
                           rgw_hostname=rgw[0].shortname,
                           ganesha_config_exists=True,
                           already_mounted=False,
                           nfs_version=nfs_version,
                           nfs_mnt_point=mount_dir,
                           Pseudo=pseudo)

    rgw_user_config_fname = 'rgw_user.yaml'

    temp_yaml_file = rgw_user_config_fname + "_" + str(
        os.getpid()) + pwd.getpwuid(os.getuid()).pw_name

    log.info('creating rgw_user_config_fname: %s' % rgw_user_config)
    local_file = '/tmp/' + temp_yaml_file
    with open(local_file, 'w') as outfile:
        outfile.write(yaml.dump(rgw_user_config, default_flow_style=False))

    log.info('copying rgw_user_config_fname to the client node')
    destination_location = 'nfs_ganesha_rgw/ceph-qe-scripts/rgw/v2/tests/nfs_ganesha/config/' + rgw_user_config_fname
    rgw[0].put_file(local_file, destination_location)

    rgw[0].run(args=[run.Raw('sudo rm -rf %s' % local_file)],
               check_status=False)

    # run the test

    rgw[0].run(args=[
        'source', 'venv/bin/activate',
        run.Raw(';'),
        run.Raw(
            'python3 nfs_ganesha_rgw/ceph-qe-scripts/rgw/v2/tests/nfs_ganesha/%s '
            '-r nfs_ganesha_rgw/ceph-qe-scripts/rgw/v2/tests/nfs_ganesha/config/rgw_user.yaml '
            '-c nfs_ganesha_rgw/ceph-qe-scripts/rgw/v2/tests/nfs_ganesha/config/%s '
            % (script_name, test_name)),
        run.Raw(';'), 'deactivate'
    ])

    try:
        yield
    finally:
        log.info("Deleting the test soot")

        rgw[0].run(args=['sudo', 'umount', run.Raw('%s' % mount_dir)])

        cleanup = lambda x: rgw[0].run(args=[run.Raw('sudo rm -rf %s' % x)])

        soot = [
            'venv', 'rgw-tests', 'test_data'
            '*.json', 'Download.*', 'Download', '*.mpFile', 'x*', 'key.*',
            'Mp.*', '*.key.*'
        ]

        list(map(cleanup, soot))
コード例 #32
0
ファイル: tgt.py プロジェクト: zwj262310/ceph
def start_tgt_remotes(ctx, start_tgtd):
    """
    This subtask starts up a tgtd on the clients specified
    """
    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
    tgtd_list = []
    for rem, roles in remotes.items():
        for _id in roles:
            if _id in start_tgtd:
                if not rem in tgtd_list:
                    tgtd_list.append(rem)
                    size = ctx.config.get('image_size', 10240)
                    rem.run(args=[
                        'rbd',
                        'create',
                        'iscsi-image',
                        '--size',
                        str(size),
                    ])
                    rem.run(args=[
                        'sudo',
                        'tgtadm',
                        '--lld',
                        'iscsi',
                        '--mode',
                        'target',
                        '--op',
                        'new',
                        '--tid',
                        '1',
                        '--targetname',
                        'rbd',
                    ])
                    rem.run(args=[
                        'sudo',
                        'tgtadm',
                        '--lld',
                        'iscsi',
                        '--mode',
                        'logicalunit',
                        '--op',
                        'new',
                        '--tid',
                        '1',
                        '--lun',
                        '1',
                        '--backing-store',
                        'iscsi-image',
                        '--bstype',
                        'rbd',
                    ])
                    rem.run(args=[
                        'sudo',
                        'tgtadm',
                        '--lld',
                        'iscsi',
                        '--op',
                        'bind',
                        '--mode',
                        'target',
                        '--tid',
                        '1',
                        '-I',
                        'ALL',
                    ])
    try:
        yield

    finally:
        for rem in tgtd_list:
            rem.run(args=[
                'sudo',
                'tgtadm',
                '--lld',
                'iscsi',
                '--mode',
                'target',
                '--op',
                'delete',
                '--force',
                '--tid',
                '1',
            ])
            rem.run(args=[
                'rbd',
                'snap',
                'purge',
                'iscsi-image',
            ])
            rem.run(args=[
                'sudo',
                'rbd',
                'rm',
                'iscsi-image',
            ])
コード例 #33
0
def cluster(ctx, config):
    """
    Handle the creation and removal of a ceph cluster.

    On startup:
        Create directories needed for the cluster.
        Create remote journals for all osds.
        Create and set keyring.
        Copy the monmap to tht test systems.
        Setup mon nodes.
        Setup mds nodes.
        Mkfs osd nodes.
        Add keyring information to monmaps
        Mkfs mon nodes.

    On exit:
        If errors occured, extract a failure message and store in ctx.summary.
        Unmount all test files and temporary journaling files.
        Save the monitor information and archive all ceph logs.
        Cleanup the keyring setup, and remove all monitor map and data files left over.

    :param ctx: Context
    :param config: Configuration
    """
    if ctx.config.get('use_existing_cluster', False) is True:
        log.info("'use_existing_cluster' is true; skipping cluster creation")
        yield

    testdir = teuthology.get_testdir(ctx)
    log.info('Creating ceph cluster...')
    run.wait(
        ctx.cluster.run(
            args=[
                'install',
                '-d',
                '-m0755',
                '--',
                '{tdir}/data'.format(tdir=testdir),
            ],
            wait=False,
        ))

    run.wait(
        ctx.cluster.run(
            args=[
                'sudo',
                'install',
                '-d',
                '-m0777',
                '--',
                '/var/run/ceph',
            ],
            wait=False,
        ))

    devs_to_clean = {}
    remote_to_roles_to_devs = {}
    remote_to_roles_to_journals = {}
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    for remote, roles_for_host in osds.remotes.iteritems():
        devs = teuthology.get_scratch_devices(remote)
        roles_to_devs = {}
        roles_to_journals = {}
        if config.get('fs'):
            log.info('fs option selected, checking for scratch devs')
            log.info('found devs: %s' % (str(devs), ))
            devs_id_map = teuthology.get_wwn_id_map(remote, devs)
            iddevs = devs_id_map.values()
            roles_to_devs = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs)
            if len(roles_to_devs) < len(iddevs):
                iddevs = iddevs[len(roles_to_devs):]
            devs_to_clean[remote] = []

        if config.get('block_journal'):
            log.info('block journal enabled')
            roles_to_journals = assign_devs(
                teuthology.roles_of_type(roles_for_host, 'osd'), iddevs)
            log.info('journal map: %s', roles_to_journals)

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled')
            roles_to_journals = {}
            remote.run(args=['sudo', 'mount', '-t', 'tmpfs', 'tmpfs', '/mnt'])
            for osd in teuthology.roles_of_type(roles_for_host, 'osd'):
                tmpfs = '/mnt/osd.%s' % osd
                roles_to_journals[osd] = tmpfs
                remote.run(args=['truncate', '-s', '1500M', tmpfs])
            log.info('journal map: %s', roles_to_journals)

        log.info('dev map: %s' % (str(roles_to_devs), ))
        remote_to_roles_to_devs[remote] = roles_to_devs
        remote_to_roles_to_journals[remote] = roles_to_journals

    log.info('Generating config...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [role_list for (remote, role_list) in remotes_and_roles]
    ips = [
        host for (host, port) in (remote.ssh.get_transport().getpeername()
                                  for (remote, role_list) in remotes_and_roles)
    ]
    conf = teuthology.skeleton_config(ctx, roles=roles, ips=ips)
    for remote, roles_to_journals in remote_to_roles_to_journals.iteritems():
        for role, journal in roles_to_journals.iteritems():
            key = "osd." + str(role)
            if key not in conf:
                conf[key] = {}
            conf[key]['osd journal'] = journal
    for section, keys in config['conf'].iteritems():
        for key, value in keys.iteritems():
            log.info("[%s] %s = %s" % (section, key, value))
            if section not in conf:
                conf[section] = {}
            conf[section][key] = value

    if config.get('tmpfs_journal'):
        conf['journal dio'] = False

    ctx.ceph = argparse.Namespace()
    ctx.ceph.conf = conf

    keyring_path = config.get('keyring_path', '/etc/ceph/ceph.keyring')

    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    firstmon = teuthology.get_first_mon(ctx, config)

    log.info('Setting up %s...' % firstmon)
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'adjust-ulimits',
        'ceph-coverage',
        coverage_dir,
        'ceph-authtool',
        '--create-keyring',
        keyring_path,
    ], )
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'adjust-ulimits',
        'ceph-coverage',
        coverage_dir,
        'ceph-authtool',
        '--gen-key',
        '--name=mon.',
        keyring_path,
    ], )
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'chmod',
        '0644',
        keyring_path,
    ], )
    (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys()
    fsid = teuthology.create_simple_monmap(
        ctx,
        remote=mon0_remote,
        conf=conf,
    )
    if not 'global' in conf:
        conf['global'] = {}
    conf['global']['fsid'] = fsid

    conf_path = config.get('conf_path', DEFAULT_CONF_PATH)
    log.info('Writing %s for FSID %s...' % (conf_path, fsid))
    write_conf(ctx, conf_path)

    log.info('Creating admin key on %s...' % firstmon)
    ctx.cluster.only(firstmon).run(args=[
        'sudo',
        'adjust-ulimits',
        'ceph-coverage',
        coverage_dir,
        'ceph-authtool',
        '--gen-key',
        '--name=client.admin',
        '--set-uid=0',
        '--cap',
        'mon',
        'allow *',
        '--cap',
        'osd',
        'allow *',
        '--cap',
        'mds',
        'allow *',
        keyring_path,
    ], )

    log.info('Copying monmap to all nodes...')
    keyring = teuthology.get_file(
        remote=mon0_remote,
        path=keyring_path,
    )
    monmap = teuthology.get_file(
        remote=mon0_remote,
        path='{tdir}/monmap'.format(tdir=testdir),
    )

    for rem in ctx.cluster.remotes.iterkeys():
        # copy mon key and initial monmap
        log.info('Sending monmap to node {remote}'.format(remote=rem))
        teuthology.sudo_write_file(remote=rem,
                                   path=keyring_path,
                                   data=keyring,
                                   perms='0644')
        teuthology.write_file(
            remote=rem,
            path='{tdir}/monmap'.format(tdir=testdir),
            data=monmap,
        )

    log.info('Setting up mon nodes...')
    mons = ctx.cluster.only(teuthology.is_type('mon'))
    run.wait(
        mons.run(
            args=[
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'osdmaptool',
                '-c',
                conf_path,
                '--clobber',
                '--createsimple',
                '{num:d}'.format(num=teuthology.num_instances_of_type(
                    ctx.cluster, 'osd'), ),
                '{tdir}/osdmap'.format(tdir=testdir),
                '--pg_bits',
                '2',
                '--pgp_bits',
                '4',
            ],
            wait=False,
        ), )

    log.info('Setting up mds nodes...')
    mdss = ctx.cluster.only(teuthology.is_type('mds'))
    for remote, roles_for_host in mdss.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mds'):
            remote.run(args=[
                'sudo',
                'mkdir',
                '-p',
                '/var/lib/ceph/mds/ceph-{id}'.format(id=id_),
                run.Raw('&&'),
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'ceph-authtool',
                '--create-keyring',
                '--gen-key',
                '--name=mds.{id}'.format(id=id_),
                '/var/lib/ceph/mds/ceph-{id}/keyring'.format(id=id_),
            ], )

    cclient.create_keyring(ctx)
    log.info('Running mkfs on osd nodes...')

    ctx.disk_config = argparse.Namespace()
    ctx.disk_config.remote_to_roles_to_dev = remote_to_roles_to_devs
    ctx.disk_config.remote_to_roles_to_journals = remote_to_roles_to_journals
    ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
    ctx.disk_config.remote_to_roles_to_dev_fstype = {}

    log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(
        r=str(ctx.disk_config.remote_to_roles_to_dev)))
    for remote, roles_for_host in osds.remotes.iteritems():
        roles_to_devs = remote_to_roles_to_devs[remote]
        roles_to_journals = remote_to_roles_to_journals[remote]

        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(args=[
                'sudo',
                'mkdir',
                '-p',
                '/var/lib/ceph/osd/ceph-{id}'.format(id=id_),
            ])
            log.info(str(roles_to_journals))
            log.info(id_)
            if roles_to_devs.get(id_):
                dev = roles_to_devs[id_]
                fs = config.get('fs')
                package = None
                mkfs_options = config.get('mkfs_options')
                mount_options = config.get('mount_options')
                if fs == 'btrfs':
                    # package = 'btrfs-tools'
                    if mount_options is None:
                        mount_options = ['noatime', 'user_subvol_rm_allowed']
                    if mkfs_options is None:
                        mkfs_options = [
                            '-m', 'single', '-l', '32768', '-n', '32768'
                        ]
                if fs == 'xfs':
                    # package = 'xfsprogs'
                    if mount_options is None:
                        mount_options = ['noatime']
                    if mkfs_options is None:
                        mkfs_options = ['-f', '-i', 'size=2048']
                if fs == 'ext4' or fs == 'ext3':
                    if mount_options is None:
                        mount_options = ['noatime', 'user_xattr']

                if mount_options is None:
                    mount_options = []
                if mkfs_options is None:
                    mkfs_options = []
                mkfs = ['mkfs.%s' % fs] + mkfs_options
                log.info('%s on %s on %s' % (mkfs, dev, remote))
                if package is not None:
                    remote.run(
                        args=['sudo', 'apt-get', 'install', '-y', package],
                        stdout=StringIO(),
                    )

                try:
                    remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs +
                               [dev])
                except run.CommandFailedError:
                    # Newer btfs-tools doesn't prompt for overwrite, use -f
                    if '-f' not in mount_options:
                        mkfs_options.append('-f')
                        mkfs = ['mkfs.%s' % fs] + mkfs_options
                        log.info('%s on %s on %s' % (mkfs, dev, remote))
                    remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs +
                               [dev])

                log.info('mount %s on %s -o %s' %
                         (dev, remote, ','.join(mount_options)))
                remote.run(args=[
                    'sudo',
                    'mount',
                    '-t',
                    fs,
                    '-o',
                    ','.join(mount_options),
                    dev,
                    os.path.join('/var/lib/ceph/osd', 'ceph-{id}'.format(
                        id=id_)),
                ])
                if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
                    ctx.disk_config.remote_to_roles_to_dev_mount_options[
                        remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][
                    id_] = mount_options
                if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
                    ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
                ctx.disk_config.remote_to_roles_to_dev_fstype[remote][id_] = fs
                devs_to_clean[remote].append(
                    os.path.join(
                        os.path.join('/var/lib/ceph/osd',
                                     'ceph-{id}'.format(id=id_)), ))

        for id_ in teuthology.roles_of_type(roles_for_host, 'osd'):
            remote.run(args=[
                'sudo',
                'MALLOC_CHECK_=3',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'ceph-osd',
                '--mkfs',
                '--mkkey',
                '-i',
                id_,
                '--monmap',
                '{tdir}/monmap'.format(tdir=testdir),
            ], )

    log.info('Reading keys from all nodes...')
    keys_fp = StringIO()
    keys = []
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['mds', 'osd']:
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                data = teuthology.get_file(
                    remote=remote,
                    path='/var/lib/ceph/{type}/ceph-{id}/keyring'.format(
                        type=type_,
                        id=id_,
                    ),
                    sudo=True,
                )
                keys.append((type_, id_, data))
                keys_fp.write(data)
    for remote, roles_for_host in ctx.cluster.remotes.iteritems():
        for type_ in ['client']:
            for id_ in teuthology.roles_of_type(roles_for_host, type_):
                data = teuthology.get_file(
                    remote=remote,
                    path='/etc/ceph/ceph.client.{id}.keyring'.format(id=id_))
                keys.append((type_, id_, data))
                keys_fp.write(data)

    log.info('Adding keys to all mons...')
    writes = mons.run(
        args=[
            'sudo',
            'tee',
            '-a',
            keyring_path,
        ],
        stdin=run.PIPE,
        wait=False,
        stdout=StringIO(),
    )
    keys_fp.seek(0)
    teuthology.feed_many_stdins_and_close(keys_fp, writes)
    run.wait(writes)
    for type_, id_, data in keys:
        run.wait(
            mons.run(
                args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    keyring_path,
                    '--name={type}.{id}'.format(
                        type=type_,
                        id=id_,
                    ),
                ] + list(teuthology.generate_caps(type_)),
                wait=False,
            ), )

    log.info('Running mkfs on mon nodes...')
    for remote, roles_for_host in mons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, 'mon'):
            remote.run(args=[
                'sudo',
                'mkdir',
                '-p',
                '/var/lib/ceph/mon/ceph-{id}'.format(id=id_),
            ], )
            remote.run(args=[
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'ceph-mon',
                '--mkfs',
                '-i',
                id_,
                '--monmap={tdir}/monmap'.format(tdir=testdir),
                '--osdmap={tdir}/osdmap'.format(tdir=testdir),
                '--keyring={kpath}'.format(kpath=keyring_path),
            ], )

    run.wait(
        mons.run(
            args=[
                'rm',
                '--',
                '{tdir}/monmap'.format(tdir=testdir),
                '{tdir}/osdmap'.format(tdir=testdir),
            ],
            wait=False,
        ), )

    try:
        yield
    except Exception:
        # we need to know this below
        ctx.summary['success'] = False
        raise
    finally:
        (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys()

        log.info('Checking cluster log for badness...')

        def first_in_ceph_log(pattern, excludes):
            """
            Find the first occurence of the pattern specified in the Ceph log,
            Returns None if none found.

            :param pattern: Pattern scanned for.
            :param excludes: Patterns to ignore.
            :return: First line of text (or None if not found)
            """
            args = [
                'sudo',
                'egrep',
                pattern,
                '/var/log/ceph/ceph.log',
            ]
            for exclude in excludes:
                args.extend([run.Raw('|'), 'egrep', '-v', exclude])
            args.extend([
                run.Raw('|'),
                'head',
                '-n',
                '1',
            ])
            r = mon0_remote.run(
                stdout=StringIO(),
                args=args,
            )
            stdout = r.stdout.getvalue()
            if stdout != '':
                return stdout
            return None

        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
                             config['log_whitelist']) is not None:
            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
            ctx.summary['success'] = False
            # use the most severe problem as the failure reason
            if 'failure_reason' not in ctx.summary:
                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
                    match = first_in_ceph_log(pattern, config['log_whitelist'])
                    if match is not None:
                        ctx.summary['failure_reason'] = \
                            '"{match}" in cluster log'.format(
                                match=match.rstrip('\n'),
                            )
                        break

        for remote, dirs in devs_to_clean.iteritems():
            for dir_ in dirs:
                log.info('Unmounting %s on %s' % (dir_, remote))
                try:
                    remote.run(args=[
                        'sync',
                        run.Raw('&&'), 'sudo', 'umount', '-f', dir_
                    ])
                except Exception as e:
                    remote.run(args=[
                        'sudo',
                        run.Raw('PATH=/usr/sbin:$PATH'),
                        'lsof',
                        run.Raw(';'),
                        'ps',
                        'auxf',
                    ])
                    raise e

        if config.get('tmpfs_journal'):
            log.info('tmpfs journal enabled - unmounting tmpfs at /mnt')
            for remote, roles_for_host in osds.remotes.iteritems():
                remote.run(
                    args=['sudo', 'umount', '-f', '/mnt'],
                    check_status=False,
                )

        if ctx.archive is not None and \
                not (ctx.config.get('archive-on-error') and ctx.summary['success']):

            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote, '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

        log.info('Cleaning ceph cluster...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'sudo',
                    'rm',
                    '-rf',
                    '--',
                    conf_path,
                    keyring_path,
                    '{tdir}/data'.format(tdir=testdir),
                    '{tdir}/monmap'.format(tdir=testdir),
                ],
                wait=False,
            ), )
コード例 #34
0
def run_daemon(ctx, config, type_):
    """
    Run daemons for a role type.  Handle the startup and termination of a a daemon.
    On startup -- set coverages, cpu_profile, valgrind values for all remotes,
    and a max_mds value for one mds.
    On cleanup -- Stop all existing daemons of this type.

    :param ctx: Context
    :param config: Configuration
    :paran type_: Role type
    """
    log.info('Starting %s daemons...' % type_)
    testdir = teuthology.get_testdir(ctx)
    daemons = ctx.cluster.only(teuthology.is_type(type_))

    # check whether any daemons if this type are configured
    if daemons is None:
        return
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    daemon_signal = 'kill'
    if config.get('coverage') or config.get('valgrind') is not None:
        daemon_signal = 'term'

    for remote, roles_for_host in daemons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, type_):
            name = '%s.%s' % (type_, id_)

            run_cmd = [
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'daemon-helper',
                daemon_signal,
            ]
            run_cmd_tail = ['ceph-%s' % (type_), '-f', '-i', id_]

            if type_ in config.get('cpu_profile', []):
                profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (
                    type_, id_)
                run_cmd.extend(['env', 'CPUPROFILE=%s' % profile_path])

            if config.get('valgrind') is not None:
                valgrind_args = None
                if type_ in config['valgrind']:
                    valgrind_args = config['valgrind'][type_]
                if name in config['valgrind']:
                    valgrind_args = config['valgrind'][name]
                run_cmd = teuthology.get_valgrind_args(testdir, name, run_cmd,
                                                       valgrind_args)

            run_cmd.extend(run_cmd_tail)

            ctx.daemons.add_daemon(
                remote,
                type_,
                id_,
                args=run_cmd,
                logger=log.getChild(name),
                stdin=run.PIPE,
                wait=False,
            )

    try:
        yield
    finally:
        teuthology.stop_daemons_of_type(ctx, type_)
コード例 #35
0
ファイル: thrashosds.py プロジェクト: wujie1993/ceph
def task(ctx, config):
    """
    "Thrash" the OSDs by randomly marking them out/down (and then back
    in) until the task is ended. This loops, and every op_delay
    seconds it randomly chooses to add or remove an OSD (even odds)
    unless there are fewer than min_out OSDs out of the cluster, or
    more than min_in OSDs in the cluster.

    All commands are run on mon0 and it stops when __exit__ is called.

    The config is optional, and is a dict containing some or all of:

    cluster: (default 'ceph') the name of the cluster to thrash

    min_in: (default 3) the minimum number of OSDs to keep in the
       cluster

    min_out: (default 0) the minimum number of OSDs to keep out of the
       cluster

    op_delay: (5) the length of time to sleep between changing an
       OSD's status

    min_dead: (0) minimum number of osds to leave down/dead.

    max_dead: (0) maximum number of osds to leave down/dead before waiting
       for clean.  This should probably be num_replicas - 1.

    clean_interval: (60) the approximate length of time to loop before
       waiting until the cluster goes clean. (In reality this is used
       to probabilistically choose when to wait, and the method used
       makes it closer to -- but not identical to -- the half-life.)

    scrub_interval: (-1) the approximate length of time to loop before
       waiting until a scrub is performed while cleaning. (In reality
       this is used to probabilistically choose when to wait, and it
       only applies to the cases where cleaning is being performed).
       -1 is used to indicate that no scrubbing will be done.

    chance_down: (0.4) the probability that the thrasher will mark an
       OSD down rather than marking it out. (The thrasher will not
       consider that OSD out of the cluster, since presently an OSD
       wrongly marked down will mark itself back up again.) This value
       can be either an integer (eg, 75) or a float probability (eg
       0.75).

    chance_test_min_size: (0) chance to run test_pool_min_size,
       which:
       - kills all but one osd
       - waits
       - kills that osd
       - revives all other osds
       - verifies that the osds fully recover

    timeout: (360) the number of seconds to wait for the cluster
       to become clean after each cluster change. If this doesn't
       happen within the timeout, an exception will be raised.

    revive_timeout: (150) number of seconds to wait for an osd asok to
       appear after attempting to revive the osd

    thrash_primary_affinity: (true) randomly adjust primary-affinity

    chance_pgnum_grow: (0) chance to increase a pool's size
    chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
    pool_grow_by: (10) amount to increase pgnum by
    max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd

    pause_short: (3) duration of short pause
    pause_long: (80) duration of long pause
    pause_check_after: (50) assert osd down after this long
    chance_inject_pause_short: (1) chance of injecting short stall
    chance_inject_pause_long: (0) chance of injecting long stall

    clean_wait: (0) duration to wait before resuming thrashing once clean

    sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a
                  random live osd

    powercycle: (false) whether to power cycle the node instead
        of just the osd process. Note that this assumes that a single
        osd is the only important process on the node.

    bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash.
        the delay lets the BlockDevice "accept" more aio operations but blocks
        any flush, and then eventually crashes (losing some or all ios).  If 0,
        no bdev failure injection is enabled.

    bdev_inject_crash_probability: (.5) probability of doing a bdev failure
        injection crash vs a normal OSD kill.

    chance_test_backfill_full: (0) chance to simulate full disks stopping
        backfill

    chance_test_map_discontinuity: (0) chance to test map discontinuity
    map_discontinuity_sleep_time: (40) time to wait for map trims

    ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
    chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)

    optrack_toggle_delay: (2.0) duration to delay between toggling op tracker
                  enablement to all osds

    dump_ops_enable: (true) continuously dump ops on all live osds

    noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub

    disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based
                                    tests

    chance_thrash_cluster_full: .05

    chance_thrash_pg_upmap: 1.0
    chance_thrash_pg_upmap_items: 1.0

    example:

    tasks:
    - ceph:
    - thrashosds:
        cluster: ceph
        chance_down: 10
        op_delay: 3
        min_in: 1
        timeout: 600
    - interactive:
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'thrashosds task only accepts a dict for configuration'
    # add default value for sighup_delay
    config['sighup_delay'] = config.get('sighup_delay', 0.1)
    # add default value for optrack_toggle_delay
    config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0)
    # add default value for dump_ops_enable
    config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
    # add default value for noscrub_toggle_delay
    config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0)

    log.info("config is {config}".format(config=str(config)))

    overrides = ctx.config.get('overrides', {})
    log.info("overrides is {overrides}".format(overrides=str(overrides)))
    teuthology.deep_merge(config, overrides.get('thrashosds', {}))
    cluster = config.get('cluster', 'ceph')

    log.info("config is {config}".format(config=str(config)))

    if 'powercycle' in config:

        # sync everyone first to avoid collateral damage to / etc.
        log.info('Doing preliminary sync to avoid collateral damage...')
        ctx.cluster.run(args=['sync'])

        if 'ipmi_user' in ctx.teuthology_config:
            for remote in ctx.cluster.remotes.keys():
                log.debug('checking console status of %s' % remote.shortname)
                if not remote.console.check_status():
                    log.warn('Failed to get console status for %s',
                             remote.shortname)

            # check that all osd remotes have a valid console
            osds = ctx.cluster.only(teuthology.is_type('osd', cluster))
            for remote in osds.remotes.keys():
                if not remote.console.has_ipmi_credentials:
                    raise Exception(
                        'IPMI console required for powercycling, '
                        'but not available on osd role: {r}'.format(
                            r=remote.name))

    cluster_manager = ctx.managers[cluster]
    for f in ['powercycle', 'bdev_inject_crash']:
        if config.get(f):
            cluster_manager.config[f] = config.get(f)

    log.info('Beginning thrashosds...')
    thrash_proc = ceph_manager.Thrasher(cluster_manager,
                                        config,
                                        logger=log.getChild('thrasher'))
    try:
        yield
    finally:
        log.info('joining thrashosds')
        thrash_proc.do_join()
        cluster_manager.wait_for_all_up()
        cluster_manager.flush_all_pg_stats()
        cluster_manager.wait_for_recovery(config.get('timeout', 360))
コード例 #36
0
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    # Expect to find ceph_admin on the first mon by ID, same place that the download task
    # puts it.  Remember this here, because subsequently IDs will change from those in
    # the test config to those that ceph-deploy invents.

    (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys()

    def execute_ceph_deploy(cmd):
        """Remotely execute a ceph_deploy command"""
        return ceph_admin.run(
            args=[
                'cd',
                '{tdir}/ceph-deploy'.format(tdir=testdir),
                run.Raw('&&'),
                run.Raw(cmd),
            ],
            check_status=False,
        ).exitstatus

    def ceph_disk_osd_create(ctx, config):
        node_dev_list = get_dev_for_osd(ctx, config)
        no_of_osds = 0
        for d in node_dev_list:
            node = d[0]
            for disk in d[1:]:
                zap = './ceph-deploy disk zap ' + node + ':' + disk
                estatus = execute_ceph_deploy(zap)
                if estatus != 0:
                    raise RuntimeError("ceph-deploy: Failed to zap osds")
            osd_create_cmd = './ceph-deploy osd create '
            # first check for filestore, default is bluestore with ceph-deploy
            if config.get('filestore') is not None:
                osd_create_cmd += '--filestore '
            elif config.get('bluestore') is not None:
                osd_create_cmd += '--bluestore '
            if config.get('dmcrypt') is not None:
                osd_create_cmd += '--dmcrypt '
            osd_create_cmd += ":".join(d)
            estatus_osd = execute_ceph_deploy(osd_create_cmd)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")
        return no_of_osds

    def ceph_volume_osd_create(ctx, config):
        osds = ctx.cluster.only(teuthology.is_type('osd'))
        no_of_osds = 0
        for remote in osds.remotes.keys():
            # all devs should be lvm
            osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' '
            # default is bluestore so we just need config item for filestore
            roles = ctx.cluster.remotes[remote]
            dev_needed = len([role for role in roles
                              if role.startswith('osd')])
            all_devs = teuthology.get_scratch_devices(remote)
            log.info("node={n}, need_devs={d}, available={a}".format(
                        n=remote.shortname,
                        d=dev_needed,
                        a=all_devs,
                        ))
            devs = all_devs[0:dev_needed]
            # rest of the devices can be used for journal if required
            jdevs = dev_needed
            for device in devs:
                device_split = device.split('/')
                lv_device = device_split[-2] + '/' + device_split[-1]
                if config.get('filestore') is not None:
                    osd_create_cmd += '--filestore --data ' + lv_device + ' '
                    # filestore with ceph-volume also needs journal disk
                    try:
                        jdevice = all_devs.pop(jdevs)
                    except IndexError:
                        raise RuntimeError("No device available for \
                                            journal configuration")
                    jdevice_split = jdevice.split('/')
                    j_lv = jdevice_split[-2] + '/' + jdevice_split[-1]
                    osd_create_cmd += '--journal ' + j_lv
                else:
                    osd_create_cmd += ' --data ' + lv_device
                estatus_osd = execute_ceph_deploy(osd_create_cmd)
                if estatus_osd == 0:
                    log.info('successfully created osd')
                    no_of_osds += 1
                else:
                    raise RuntimeError("ceph-deploy: Failed to create osds")
        return no_of_osds

    try:
        log.info('Building ceph cluster using ceph-deploy...')
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get('branch') is not None:
            cbranch = config.get('branch')
            for var, val in cbranch.items():
                ceph_branch = '--{var}={val}'.format(var=var, val=val)
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_role(ctx, 'mds')
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_role(ctx, 'mon')
        mon_nodes = " ".join(mon_node)
        # skip mgr based on config item
        # this is needed when test uses latest code to install old ceph
        # versions
        skip_mgr = config.get('skip-mgr', False)
        if not skip_mgr:
            mgr_nodes = get_nodes_using_role(ctx, 'mgr')
            mgr_nodes = " ".join(mgr_nodes)
        new_mon = './ceph-deploy new' + " " + mon_nodes
        if not skip_mgr:
            mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
        mon_hostname = mon_nodes.split(' ')[0]
        mon_hostname = str(mon_hostname)
        gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
        deploy_mds = './ceph-deploy mds create' + " " + mds_nodes

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info('adding config inputs...')
        testdir = teuthology.get_testdir(ctx)
        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)

        if config.get('conf') is not None:
            confp = config.get('conf')
            for section, keys in confp.items():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
                                                sudo=True)
                for key, value in keys.items():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(
                        ceph_admin, conf_path, lines, sudo=True)

        # install ceph
        dev_branch = ctx.config['branch']
        branch = '--dev={branch}'.format(branch=dev_branch)
        if ceph_branch:
            option = ceph_branch
        else:
            option = branch
        install_nodes = './ceph-deploy install ' + option + " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")
        # install ceph-test package too
        install_nodes2 = './ceph-deploy install --tests ' + option + \
                         " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes2)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph-test")

        mon_create_nodes = './ceph-deploy mon create-initial'
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        execute_ceph_deploy(mon_create_nodes)

        estatus_gather = execute_ceph_deploy(gather_keys)
        if estatus_gather != 0:
            raise RuntimeError("ceph-deploy: Failed during gather keys")

        # install admin key on mons (ceph-create-keys doesn't do this any more)
        mons = ctx.cluster.only(teuthology.is_type('mon'))
        for remote in mons.remotes.keys():
            execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname)

        # create osd's
        if config.get('use-ceph-volume', False):
            no_of_osds = ceph_volume_osd_create(ctx, config)
        else:
            # this method will only work with ceph-deploy v1.5.39 or older
            no_of_osds = ceph_disk_osd_create(ctx, config)

        if not skip_mgr:
            execute_ceph_deploy(mgr_create)

        if mds_nodes:
            estatus_mds = execute_ceph_deploy(deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get('test_mon_destroy') is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = './ceph-deploy mon destroy' + \
                    " " + mon_node[d]
                estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")



        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info('Setting up client nodes...')
            conf_path = '/etc/ceph/ceph.conf'
            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(
                remote=mon0_remote,
                path=conf_path,
                sudo=True,
            )
            admin_keyring = teuthology.get_file(
                remote=mon0_remote,
                path=admin_keyring_path,
                sudo=True,
            )

            clients = ctx.cluster.only(teuthology.is_type('client'))
            for remot, roles_for_host in clients.remotes.items():
                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                    client_keyring = \
                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    mon0_remote.run(
                        args=[
                            'cd',
                            '{tdir}'.format(tdir=testdir),
                            run.Raw('&&'),
                            'sudo', 'bash', '-c',
                            run.Raw('"'), 'ceph',
                            'auth',
                            'get-or-create',
                            'client.{id}'.format(id=id_),
                            'mds', 'allow',
                            'mon', 'allow *',
                            'osd', 'allow *',
                            run.Raw('>'),
                            client_keyring,
                            run.Raw('"'),
                        ],
                    )
                    key_data = teuthology.get_file(
                        remote=mon0_remote,
                        path=client_keyring,
                        sudo=True,
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=client_keyring,
                        data=key_data,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=admin_keyring_path,
                        data=admin_keyring,
                        perms='0644'
                    )
                    teuthology.sudo_write_file(
                        remote=remot,
                        path=conf_path,
                        data=conf_data,
                        perms='0644'
                    )

            if mds_nodes:
                log.info('Configuring CephFS...')
                Filesystem(ctx, create=True)
        elif not config.get('only_mon'):
            raise RuntimeError(
                "The cluster is NOT operational due to insufficient OSDs")
        # create rbd pool
        ceph_admin.run(
            args=[
                'sudo', 'ceph', '--cluster', 'ceph',
                'osd', 'pool', 'create', 'rbd', '128', '128'],
            check_status=False)
        ceph_admin.run(
            args=[
                'sudo', 'ceph', '--cluster', 'ceph',
                'osd', 'pool', 'application', 'enable',
                'rbd', 'rbd', '--yes-i-really-mean-it'
                ],
            check_status=False)
        yield

    except Exception:
        log.info(
            "Error encountered, logging exception before tearing down ceph-deploy")
        log.info(traceback.format_exc())
        raise
    finally:
        if config.get('keep_running'):
            return
        log.info('Stopping ceph...')
        ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
                        check_status=False)
        time.sleep(4)

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
                              'grep', '-v', 'grep', run.Raw('|'),
                              'grep', 'ceph'], check_status=False)
        ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'),
                              'grep', 'ceph'], check_status=False)

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.items():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote,
                            '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                    ],
                    wait=False,
                ),
            )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.keys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = './ceph-deploy purge' + " " + all_nodes
        purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes

        log.info('Purging package...')
        execute_ceph_deploy(purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(purgedata_nodes)
コード例 #37
0
ファイル: ceph_deploy.py プロジェクト: LiumxNL/ceph-qa-suite
def build_ceph_cluster(ctx, config):
    """Build a ceph cluster"""

    # Expect to find ceph_admin on the first mon by ID, same place that the download task
    # puts it.  Remember this here, because subsequently IDs will change from those in
    # the test config to those that ceph-deploy invents.
    (ceph_admin, ) = ctx.cluster.only(teuthology.get_first_mon(
        ctx, config)).remotes.iterkeys()

    def execute_ceph_deploy(cmd):
        """Remotely execute a ceph_deploy command"""
        return ceph_admin.run(
            args=[
                'cd',
                '{tdir}/ceph-deploy'.format(tdir=testdir),
                run.Raw('&&'),
                run.Raw(cmd),
            ],
            check_status=False,
        ).exitstatus

    try:
        log.info('Building ceph cluster using ceph-deploy...')
        testdir = teuthology.get_testdir(ctx)
        ceph_branch = None
        if config.get('branch') is not None:
            cbranch = config.get('branch')
            for var, val in cbranch.iteritems():
                ceph_branch = '--{var}={val}'.format(var=var, val=val)
        all_nodes = get_all_nodes(ctx, config)
        mds_nodes = get_nodes_using_role(ctx, 'mds')
        mds_nodes = " ".join(mds_nodes)
        mon_node = get_nodes_using_role(ctx, 'mon')
        mon_nodes = " ".join(mon_node)
        new_mon = './ceph-deploy new' + " " + mon_nodes
        mon_hostname = mon_nodes.split(' ')[0]
        mon_hostname = str(mon_hostname)
        gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
        deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
        no_of_osds = 0

        if mon_nodes is None:
            raise RuntimeError("no monitor nodes in the config file")

        estatus_new = execute_ceph_deploy(new_mon)
        if estatus_new != 0:
            raise RuntimeError("ceph-deploy: new command failed")

        log.info('adding config inputs...')
        testdir = teuthology.get_testdir(ctx)
        conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)

        if config.get('conf') is not None:
            confp = config.get('conf')
            for section, keys in confp.iteritems():
                lines = '[{section}]\n'.format(section=section)
                teuthology.append_lines_to_file(ceph_admin,
                                                conf_path,
                                                lines,
                                                sudo=True)
                for key, value in keys.iteritems():
                    log.info("[%s] %s = %s" % (section, key, value))
                    lines = '{key} = {value}\n'.format(key=key, value=value)
                    teuthology.append_lines_to_file(ceph_admin,
                                                    conf_path,
                                                    lines,
                                                    sudo=True)

        # install ceph
        install_nodes = './ceph-deploy install ' + \
            (ceph_branch if ceph_branch else "--dev=master") + " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph")
        # install ceph-test package too
        install_nodes2 = './ceph-deploy install --tests ' + \
            (ceph_branch if ceph_branch else "--dev=master") + " " + all_nodes
        estatus_install = execute_ceph_deploy(install_nodes2)
        if estatus_install != 0:
            raise RuntimeError("ceph-deploy: Failed to install ceph-test")

        mon_create_nodes = './ceph-deploy mon create-initial'
        # If the following fails, it is OK, it might just be that the monitors
        # are taking way more than a minute/monitor to form quorum, so lets
        # try the next block which will wait up to 15 minutes to gatherkeys.
        execute_ceph_deploy(mon_create_nodes)

        estatus_gather = execute_ceph_deploy(gather_keys)
        max_gather_tries = 90
        gather_tries = 0
        while (estatus_gather != 0):
            gather_tries += 1
            if gather_tries >= max_gather_tries:
                msg = 'ceph-deploy was not able to gatherkeys after 15 minutes'
                raise RuntimeError(msg)
            estatus_gather = execute_ceph_deploy(gather_keys)
            time.sleep(10)

        if mds_nodes:
            estatus_mds = execute_ceph_deploy(deploy_mds)
            if estatus_mds != 0:
                raise RuntimeError("ceph-deploy: Failed to deploy mds")

        if config.get('test_mon_destroy') is not None:
            for d in range(1, len(mon_node)):
                mon_destroy_nodes = './ceph-deploy mon destroy' + \
                    " " + mon_node[d]
                estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
                if estatus_mon_d != 0:
                    raise RuntimeError("ceph-deploy: Failed to delete monitor")

        node_dev_list = get_dev_for_osd(ctx, config)
        for d in node_dev_list:
            node = d[0]
            for disk in d[1:]:
                zap = './ceph-deploy disk zap ' + node + ':' + disk
                estatus = execute_ceph_deploy(zap)
                if estatus != 0:
                    raise RuntimeError("ceph-deploy: Failed to zap osds")
            osd_create_cmd = './ceph-deploy osd create '
            if config.get('dmcrypt') is not None:
                osd_create_cmd += '--dmcrypt '
            osd_create_cmd += ":".join(d)
            estatus_osd = execute_ceph_deploy(osd_create_cmd)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")

        if config.get('wait-for-healthy', True) and no_of_osds >= 2:
            is_healthy(ctx=ctx, config=None)

            log.info('Setting up client nodes...')
            conf_path = '/etc/ceph/ceph.conf'
            admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
            first_mon = teuthology.get_first_mon(ctx, config)
            (mon0_remote, ) = ctx.cluster.only(first_mon).remotes.keys()
            conf_data = teuthology.get_file(
                remote=mon0_remote,
                path=conf_path,
                sudo=True,
            )
            admin_keyring = teuthology.get_file(
                remote=mon0_remote,
                path=admin_keyring_path,
                sudo=True,
            )

            clients = ctx.cluster.only(teuthology.is_type('client'))
            for remot, roles_for_host in clients.remotes.iteritems():
                for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                    client_keyring = \
                        '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
                    mon0_remote.run(args=[
                        'cd',
                        '{tdir}'.format(tdir=testdir),
                        run.Raw('&&'),
                        'sudo',
                        'bash',
                        '-c',
                        run.Raw('"'),
                        'ceph',
                        'auth',
                        'get-or-create',
                        'client.{id}'.format(id=id_),
                        'mds',
                        'allow',
                        'mon',
                        'allow *',
                        'osd',
                        'allow *',
                        run.Raw('>'),
                        client_keyring,
                        run.Raw('"'),
                    ], )
                    key_data = teuthology.get_file(
                        remote=mon0_remote,
                        path=client_keyring,
                        sudo=True,
                    )
                    teuthology.sudo_write_file(remote=remot,
                                               path=client_keyring,
                                               data=key_data,
                                               perms='0644')
                    teuthology.sudo_write_file(remote=remot,
                                               path=admin_keyring_path,
                                               data=admin_keyring,
                                               perms='0644')
                    teuthology.sudo_write_file(remote=remot,
                                               path=conf_path,
                                               data=conf_data,
                                               perms='0644')

            if mds_nodes:
                log.info('Configuring CephFS...')
                ceph_fs = Filesystem(ctx,
                                     admin_remote=clients.remotes.keys()[0])
                if not ceph_fs.legacy_configured():
                    ceph_fs.create()
        elif not config.get('only_mon'):
            raise RuntimeError(
                "The cluster is NOT operational due to insufficient OSDs")
        yield

    except Exception:
        log.info(
            "Error encountered, logging exception before tearing down ceph-deploy"
        )
        log.info(traceback.format_exc())
        raise
    finally:
        if config.get('keep_running'):
            return
        log.info('Stopping ceph...')
        ctx.cluster.run(args=[
            'sudo', 'stop', 'ceph-all',
            run.Raw('||'), 'sudo', 'service', 'ceph', 'stop',
            run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ceph.target'
        ])

        # Are you really not running anymore?
        # try first with the init tooling
        # ignoring the status so this becomes informational only
        ctx.cluster.run(args=[
            'sudo', 'status', 'ceph-all',
            run.Raw('||'), 'sudo', 'service', 'ceph', 'status',
            run.Raw('||'), 'sudo', 'systemctl', 'status', 'ceph.target'
        ],
                        check_status=False)

        # and now just check for the processes themselves, as if upstart/sysvinit
        # is lying to us. Ignore errors if the grep fails
        ctx.cluster.run(args=[
            'sudo', 'ps', 'aux',
            run.Raw('|'), 'grep', '-v', 'grep',
            run.Raw('|'), 'grep', 'ceph'
        ],
                        check_status=False)

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote, '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                    ],
                    wait=False,
                ), )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        # Prevent these from being undefined if the try block fails
        all_nodes = get_all_nodes(ctx, config)
        purge_nodes = './ceph-deploy purge' + " " + all_nodes
        purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes

        log.info('Purging package...')
        execute_ceph_deploy(purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(purgedata_nodes)
コード例 #38
0
def run_daemon(ctx, config, type_):
    log.info('Starting %s daemons...' % type_)
    testdir = teuthology.get_testdir(ctx)
    daemons = ctx.cluster.only(teuthology.is_type(type_))
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)

    daemon_signal = 'kill'
    if config.get('coverage') or config.get('valgrind') is not None:
        daemon_signal = 'term'

    num_active = 0
    for remote, roles_for_host in daemons.remotes.iteritems():
        for id_ in teuthology.roles_of_type(roles_for_host, type_):
            name = '%s.%s' % (type_, id_)

            if not (id_.endswith('-s')) and (id_.find('-s-') == -1):
                num_active += 1

            run_cmd = [
                'sudo',
                'adjust-ulimits',
                'ceph-coverage',
                coverage_dir,
                'daemon-helper',
                daemon_signal,
                ]
            run_cmd_tail = [
                'ceph-%s' % (type_),
                '-f',
                '-i', id_]

            if type_ in config.get('cpu_profile', []):
                profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (type_, id_)
                run_cmd.extend([ 'env', 'CPUPROFILE=%s' % profile_path ])

            if config.get('valgrind') is not None:
                valgrind_args = None
                if type_ in config['valgrind']:
                    valgrind_args = config['valgrind'][type_]
                if name in config['valgrind']:
                    valgrind_args = config['valgrind'][name]
                run_cmd = teuthology.get_valgrind_args(testdir, name,
                                                       run_cmd,
                                                       valgrind_args)

            run_cmd.extend(run_cmd_tail)

            ctx.daemons.add_daemon(remote, type_, id_,
                                   args=run_cmd,
                                   logger=log.getChild(name),
                                   stdin=run.PIPE,
                                   wait=False,
                                   )

    if type_ == 'mds':
        firstmon = teuthology.get_first_mon(ctx, config)
        (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()

        mon0_remote.run(args=[
            'adjust-ulimits',
            'ceph-coverage',
            coverage_dir,
            'ceph',
            'mds', 'set_max_mds', str(num_active)])

    try:
        yield
    finally:
        teuthology.stop_daemons_of_type(ctx, type_)
コード例 #39
0
def task(ctx, config):
    """
    Run ceph_objectstore_tool test

    The config should be as follows::

        ceph_objectstore_tool:
          objects: 20 # <number of objects>
          pgnum: 12
    """

    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'ceph_objectstore_tool task only accepts a dict for configuration'

    log.info('Beginning ceph_objectstore_tool...')

    log.debug(config)
    log.debug(ctx)
    clients = ctx.cluster.only(teuthology.is_type('client'))
    assert len(clients.remotes) > 0, 'Must specify at least 1 client'
    (cli_remote, _) = clients.remotes.popitem()
    log.debug(cli_remote)

    # clients = dict(teuthology.get_clients(ctx=ctx, roles=config.keys()))
    # client = clients.popitem()
    # log.info(client)
    osds = ctx.cluster.only(teuthology.is_type('osd'))
    log.info("OSDS")
    log.info(osds)
    log.info(osds.remotes)

    first_mon = teuthology.get_first_mon(ctx, config)
    (mon, ) = ctx.cluster.only(first_mon).remotes.iterkeys()
    manager = ceph_manager.CephManager(
        mon,
        ctx=ctx,
        config=config,
        logger=log.getChild('ceph_manager'),
    )
    ctx.manager = manager

    while (len(manager.get_osd_status()['up']) != len(
            manager.get_osd_status()['raw'])):
        time.sleep(10)
    while (len(manager.get_osd_status()['in']) != len(
            manager.get_osd_status()['up'])):
        time.sleep(10)
    manager.raw_cluster_cmd('osd', 'set', 'noout')
    manager.raw_cluster_cmd('osd', 'set', 'nodown')

    PGNUM = config.get('pgnum', 12)
    log.info("pgnum: {num}".format(num=PGNUM))

    ERRORS = 0

    REP_POOL = "rep_pool"
    REP_NAME = "REPobject"
    create_replicated_pool(cli_remote, REP_POOL, PGNUM)
    ERRORS += test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME)

    EC_POOL = "ec_pool"
    EC_NAME = "ECobject"
    create_ec_pool(cli_remote, EC_POOL, 'default', PGNUM)
    ERRORS += test_objectstore(ctx,
                               config,
                               cli_remote,
                               EC_POOL,
                               EC_NAME,
                               ec=True)

    if ERRORS == 0:
        log.info("TEST PASSED")
    else:
        log.error("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS))

    assert ERRORS == 0

    try:
        yield
    finally:
        log.info('Ending ceph_objectstore_tool')
コード例 #40
0
ファイル: ceph-deploy.py プロジェクト: AsherBond/teuthology
def build_ceph_cluster(ctx, config):
    log.info('Building ceph cluster using ceph-deploy...')
    testdir = teuthology.get_testdir(ctx)
    ceph_branch = None
    if config.get('branch') is not None:
        cbranch = config.get('branch')
        for var, val in cbranch.iteritems():
            if var == 'testing':
                ceph_branch = '--{var}'.format(var=var)
            ceph_branch = '--{var}={val}'.format(var=var, val=val)
    node_dev_list = []
    all_nodes = get_all_nodes(ctx, config)
    mds_nodes = get_nodes_using_roles(ctx, config, 'mds')
    mds_nodes = " ".join(mds_nodes)
    mon_node = get_nodes_using_roles(ctx, config, 'mon')
    mon_nodes = " ".join(mon_node)
    new_mon = './ceph-deploy new' + " " + mon_nodes
    install_nodes = './ceph-deploy install ' + ceph_branch + " " + all_nodes
    purge_nodes = './ceph-deploy purge' + " " + all_nodes
    purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes
    mon_hostname = mon_nodes.split(' ')[0]
    mon_hostname = str(mon_hostname)
    gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
    deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
    no_of_osds = 0

    if mon_nodes is None:
        raise RuntimeError("no monitor nodes in the config file")

    estatus_new = execute_ceph_deploy(ctx, config, new_mon)
    if estatus_new != 0:
        raise RuntimeError("ceph-deploy: new command failed")

    log.info('adding config inputs...')
    testdir = teuthology.get_testdir(ctx)
    conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
    first_mon = teuthology.get_first_mon(ctx, config)
    (remote, ) = ctx.cluster.only(first_mon).remotes.keys()

    lines = None
    if config.get('conf') is not None:
        confp = config.get('conf')
        for section, keys in confp.iteritems():
            lines = '[{section}]\n'.format(section=section)
            teuthology.append_lines_to_file(remote,
                                            conf_path,
                                            lines,
                                            sudo=True)
            for key, value in keys.iteritems():
                log.info("[%s] %s = %s" % (section, key, value))
                lines = '{key} = {value}\n'.format(key=key, value=value)
                teuthology.append_lines_to_file(remote,
                                                conf_path,
                                                lines,
                                                sudo=True)

    estatus_install = execute_ceph_deploy(ctx, config, install_nodes)
    if estatus_install != 0:
        raise RuntimeError("ceph-deploy: Failed to install ceph")

    mon_no = None
    mon_no = config.get('mon_initial_members')
    if mon_no is not None:
        i = 0
        mon1 = []
        while (i < mon_no):
            mon1.append(mon_node[i])
            i = i + 1
        initial_mons = " ".join(mon1)
        for k in range(mon_no, len(mon_node)):
            mon_create_nodes = './ceph-deploy mon create' + " " + initial_mons + " " + mon_node[
                k]
            estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
            if estatus_mon != 0:
                raise RuntimeError("ceph-deploy: Failed to create monitor")
    else:
        mon_create_nodes = './ceph-deploy mon create' + " " + mon_nodes
        estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes)
        if estatus_mon != 0:
            raise RuntimeError("ceph-deploy: Failed to create monitors")

    estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)
    while (estatus_gather != 0):
        #mon_create_nodes = './ceph-deploy mon create'+" "+mon_node[0]
        #execute_ceph_deploy(ctx, config, mon_create_nodes)
        estatus_gather = execute_ceph_deploy(ctx, config, gather_keys)

    if mds_nodes:
        estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds)
        if estatus_mds != 0:
            raise RuntimeError("ceph-deploy: Failed to deploy mds")

    if config.get('test_mon_destroy') is not None:
        for d in range(1, len(mon_node)):
            mon_destroy_nodes = './ceph-deploy mon destroy' + " " + mon_node[d]
            estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes)
            if estatus_mon_d != 0:
                raise RuntimeError("ceph-deploy: Failed to delete monitor")

    node_dev_list = get_dev_for_osd(ctx, config)
    for d in node_dev_list:
        osd_create_cmds = './ceph-deploy osd create --zap-disk' + " " + d
        estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
        if estatus_osd == 0:
            log.info('successfully created osd')
            no_of_osds += 1
        else:
            zap_disk = './ceph-deploy disk zap' + " " + d
            execute_ceph_deploy(ctx, config, zap_disk)
            estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds)
            if estatus_osd == 0:
                log.info('successfully created osd')
                no_of_osds += 1
            else:
                raise RuntimeError("ceph-deploy: Failed to create osds")

    if config.get('wait-for-healthy', True) and no_of_osds >= 2:
        is_healthy(ctx=ctx, config=None)

        log.info('Setting up client nodes...')
        conf_path = '/etc/ceph/ceph.conf'
        admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
        first_mon = teuthology.get_first_mon(ctx, config)
        (mon0_remote, ) = ctx.cluster.only(first_mon).remotes.keys()
        conf_data = teuthology.get_file(
            remote=mon0_remote,
            path=conf_path,
            sudo=True,
        )
        admin_keyring = teuthology.get_file(
            remote=mon0_remote,
            path=admin_keyring_path,
            sudo=True,
        )

        clients = ctx.cluster.only(teuthology.is_type('client'))
        for remot, roles_for_host in clients.remotes.iteritems():
            for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
                client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(
                    id=id_)
                mon0_remote.run(args=[
                    'cd',
                    '{tdir}'.format(tdir=testdir),
                    run.Raw('&&'),
                    'sudo',
                    'bash',
                    '-c',
                    run.Raw('"'),
                    'ceph',
                    'auth',
                    'get-or-create',
                    'client.{id}'.format(id=id_),
                    'mds',
                    'allow',
                    'mon',
                    'allow *',
                    'osd',
                    'allow *',
                    run.Raw('>'),
                    client_keyring,
                    run.Raw('"'),
                ], )
                key_data = teuthology.get_file(
                    remote=mon0_remote,
                    path=client_keyring,
                    sudo=True,
                )
                teuthology.sudo_write_file(remote=remot,
                                           path=client_keyring,
                                           data=key_data,
                                           perms='0644')
                teuthology.sudo_write_file(remote=remot,
                                           path=admin_keyring_path,
                                           data=admin_keyring,
                                           perms='0644')
                teuthology.sudo_write_file(remote=remot,
                                           path=conf_path,
                                           data=conf_data,
                                           perms='0644')
    else:
        raise RuntimeError(
            "The cluster is NOT operational due to insufficient OSDs")

    try:
        yield

    finally:
        log.info('Stopping ceph...')
        ctx.cluster.run(args=[
            'sudo', 'stop', 'ceph-all',
            run.Raw('||'), 'sudo', 'service', 'ceph', 'stop'
        ])

        if ctx.archive is not None:
            # archive mon data, too
            log.info('Archiving mon data...')
            path = os.path.join(ctx.archive, 'data')
            os.makedirs(path)
            mons = ctx.cluster.only(teuthology.is_type('mon'))
            for remote, roles in mons.remotes.iteritems():
                for role in roles:
                    if role.startswith('mon.'):
                        teuthology.pull_directory_tarball(
                            remote, '/var/lib/ceph/mon',
                            path + '/' + role + '.tgz')

            log.info('Compressing logs...')
            run.wait(
                ctx.cluster.run(
                    args=[
                        'sudo',
                        'find',
                        '/var/log/ceph',
                        '-name',
                        '*.log',
                        '-print0',
                        run.Raw('|'),
                        'sudo',
                        'xargs',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
                        '--',
                    ],
                    wait=False,
                ), )

            log.info('Archiving logs...')
            path = os.path.join(ctx.archive, 'remote')
            os.makedirs(path)
            for remote in ctx.cluster.remotes.iterkeys():
                sub = os.path.join(path, remote.shortname)
                os.makedirs(sub)
                teuthology.pull_directory(remote, '/var/log/ceph',
                                          os.path.join(sub, 'log'))

        log.info('Purging package...')
        execute_ceph_deploy(ctx, config, purge_nodes)
        log.info('Purging data...')
        execute_ceph_deploy(ctx, config, purgedata_nodes)
コード例 #41
0
ファイル: cephadm.py プロジェクト: perrynzhou/ceph-note
def ceph_mons(ctx, config):
    """
    Deploy any additional mons
    """
    cluster_name = config['cluster']
    fsid = ctx.ceph[cluster_name].fsid

    try:
        daemons = {}
        if config.get('add_mons_via_daemon_add'):
            # This is the old way of adding mons that works with the (early) octopus
            # cephadm scheduler.
            num_mons = 1
            for remote, roles in ctx.cluster.remotes.items():
                for mon in [
                        r for r in roles
                        if teuthology.is_type('mon', cluster_name)(r)
                ]:
                    c_, _, id_ = teuthology.split_role(mon)
                    if c_ == cluster_name and id_ == ctx.ceph[
                            cluster_name].first_mon:
                        continue
                    log.info('Adding %s on %s' % (mon, remote.shortname))
                    num_mons += 1
                    _shell(ctx, cluster_name, remote, [
                        'ceph',
                        'orch',
                        'daemon',
                        'add',
                        'mon',
                        remote.shortname + ':' +
                        ctx.ceph[cluster_name].mons[mon] + '=' + id_,
                    ])
                    ctx.daemons.register_daemon(
                        remote,
                        'mon',
                        id_,
                        cluster=cluster_name,
                        fsid=fsid,
                        logger=log.getChild(mon),
                        wait=False,
                        started=True,
                    )
                    daemons[mon] = (remote, id_)

                    with contextutil.safe_while(sleep=1, tries=180) as proceed:
                        while proceed():
                            log.info('Waiting for %d mons in monmap...' %
                                     (num_mons))
                            r = _shell(
                                ctx=ctx,
                                cluster_name=cluster_name,
                                remote=remote,
                                args=[
                                    'ceph',
                                    'mon',
                                    'dump',
                                    '-f',
                                    'json',
                                ],
                                stdout=StringIO(),
                            )
                            j = json.loads(r.stdout.getvalue())
                            if len(j['mons']) == num_mons:
                                break
        else:
            nodes = []
            for remote, roles in ctx.cluster.remotes.items():
                for mon in [
                        r for r in roles
                        if teuthology.is_type('mon', cluster_name)(r)
                ]:
                    c_, _, id_ = teuthology.split_role(mon)
                    log.info('Adding %s on %s' % (mon, remote.shortname))
                    nodes.append(remote.shortname + ':' +
                                 ctx.ceph[cluster_name].mons[mon] + '=' + id_)
                    if c_ == cluster_name and id_ == ctx.ceph[
                            cluster_name].first_mon:
                        continue
                    daemons[mon] = (remote, id_)

            _shell(ctx, cluster_name, remote, [
                'ceph', 'orch', 'apply', 'mon',
                str(len(nodes)) + ';' + ';'.join(nodes)
            ])
            for mgr, i in daemons.items():
                remote, id_ = i
                ctx.daemons.register_daemon(
                    remote,
                    'mon',
                    id_,
                    cluster=cluster_name,
                    fsid=fsid,
                    logger=log.getChild(mon),
                    wait=False,
                    started=True,
                )

            with contextutil.safe_while(sleep=1, tries=180) as proceed:
                while proceed():
                    log.info('Waiting for %d mons in monmap...' % (len(nodes)))
                    r = _shell(
                        ctx=ctx,
                        cluster_name=cluster_name,
                        remote=remote,
                        args=[
                            'ceph',
                            'mon',
                            'dump',
                            '-f',
                            'json',
                        ],
                        stdout=StringIO(),
                    )
                    j = json.loads(r.stdout.getvalue())
                    if len(j['mons']) == len(nodes):
                        break

        # refresh our (final) ceph.conf file
        bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
        log.info('Generating final ceph.conf file...')
        r = _shell(
            ctx=ctx,
            cluster_name=cluster_name,
            remote=bootstrap_remote,
            args=[
                'ceph',
                'config',
                'generate-minimal-conf',
            ],
            stdout=StringIO(),
        )
        ctx.ceph[cluster_name].config_file = r.stdout.getvalue()

        yield

    finally:
        pass
コード例 #42
0
ファイル: cbt.py プロジェクト: zjia03/ceph14.2.10
 def hosts_of_type(self, type_):
     return [
         r.name
         for r in self.ctx.cluster.only(misc.is_type(type_)).remotes.keys()
     ]
コード例 #43
0
def upgrade(ctx, config):
    """
     Upgrade using ceph-deploy
     eg:
       ceph-deploy.upgrade:
          # to upgrade to specific branch, use
          branch:
             stable: jewel
           # to setup mgr node, use
           setup-mgr-node: True
           # to wait for cluster to be healthy after all upgrade, use
           wait-for-healthy: True
           role: (upgrades the below roles serially)
              mon.a
              mon.b
              osd.0
     """
    roles = config.get('roles')
    # get the roles that are mapped as per ceph-deploy
    # roles are mapped for mon/mds eg: mon.a  => mon.host_short_name
    mapped_role = ctx.cluster.mapped_role
    log.info("roles={r}, mapped_roles={mr}".format(r=roles, mr=mapped_role))
    if config.get('branch'):
        branch = config.get('branch')
        (var, val) = branch.items()[0]
        ceph_branch = '--{var}={val}'.format(var=var, val=val)
    else:
        # default to wip-branch under test
        dev_branch = ctx.config['branch']
        ceph_branch = '--dev={branch}'.format(branch=dev_branch)
    # get the node used for initial deployment which is mon.a
    mon_a = mapped_role.get('mon.a')
    (ceph_admin,) = ctx.cluster.only(mon_a).remotes.keys()
    testdir = teuthology.get_testdir(ctx)
    cmd = './ceph-deploy install ' + ceph_branch
    for role in roles:
        # check if this role is mapped (mon or mds)
        if mapped_role.get(role):
            role = mapped_role.get(role)
        remotes_and_roles = ctx.cluster.only(role).remotes
        for remote, roles in remotes_and_roles.items():
            nodename = remote.shortname
            cmd = cmd + ' ' + nodename
            log.info("Upgrading ceph on  %s", nodename)
            ceph_admin.run(
                args=[
                    'cd',
                    '{tdir}/ceph-deploy'.format(tdir=testdir),
                    run.Raw('&&'),
                    run.Raw(cmd),
                ],
            )
            # restart all ceph services, ideally upgrade should but it does not
            remote.run(
                args=[
                    'sudo', 'systemctl', 'restart', 'ceph.target'
                ]
            )
            ceph_admin.run(args=['sudo', 'ceph', '-s'])

    # workaround for http://tracker.ceph.com/issues/20950
    # write the correct mgr key to disk
    if config.get('setup-mgr-node', None):
        mons = ctx.cluster.only(teuthology.is_type('mon'))
        for remote, roles in mons.remotes.items():
            remote.run(
                args=[
                    run.Raw('sudo ceph auth get client.bootstrap-mgr'),
                    run.Raw('|'),
                    run.Raw('sudo tee'),
                    run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring')
                ]
            )

    if config.get('setup-mgr-node', None):
        mgr_nodes = get_nodes_using_role(ctx, 'mgr')
        mgr_nodes = " ".join(mgr_nodes)
        mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes
        mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
        # install mgr
        ceph_admin.run(
            args=[
                'cd',
                '{tdir}/ceph-deploy'.format(tdir=testdir),
                run.Raw('&&'),
                run.Raw(mgr_install),
                ],
            )
        # create mgr
        ceph_admin.run(
            args=[
                'cd',
                '{tdir}/ceph-deploy'.format(tdir=testdir),
                run.Raw('&&'),
                run.Raw(mgr_create),
                ],
            )
        ceph_admin.run(args=['sudo', 'ceph', '-s'])
    if config.get('wait-for-healthy', None):
        wait_until_healthy(ctx, ceph_admin, use_sudo=True)
    yield
コード例 #44
0
def cli_test(ctx, config):
    """
     ceph-deploy cli to exercise most commonly use cli's and ensure
     all commands works and also startup the init system.

    """
    log.info('Ceph-deploy Test')
    if config is None:
        config = {}
    test_branch = ''
    conf_dir = teuthology.get_testdir(ctx) + "/cdtest"

    def execute_cdeploy(admin, cmd, path):
        """Execute ceph-deploy commands """
        """Either use git path or repo path """
        args = ['cd', conf_dir, run.Raw(';')]
        if path:
            args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path))
        else:
            args.append('ceph-deploy')
        args.append(run.Raw(cmd))
        ec = admin.run(args=args, check_status=False).exitstatus
        if ec != 0:
            raise RuntimeError(
                "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec))

    if config.get('rhbuild'):
        path = None
    else:
        path = teuthology.get_testdir(ctx)
        # test on branch from config eg: wip-* , master or next etc
        # packages for all distro's should exist for wip*
        if ctx.config.get('branch'):
            branch = ctx.config.get('branch')
            test_branch = ' --dev={branch} '.format(branch=branch)
    mons = ctx.cluster.only(teuthology.is_type('mon'))
    for node, role in mons.remotes.items():
        admin = node
        admin.run(args=['mkdir', conf_dir], check_status=False)
        nodename = admin.shortname
    system_type = teuthology.get_system_type(admin)
    if config.get('rhbuild'):
        admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y'])
    log.info('system type is %s', system_type)
    osds = ctx.cluster.only(teuthology.is_type('osd'))

    for remote, roles in osds.remotes.items():
        devs = teuthology.get_scratch_devices(remote)
        log.info("roles %s", roles)
        if (len(devs) < 3):
            log.error(
                'Test needs minimum of 3 devices, only found %s',
                str(devs))
            raise RuntimeError("Needs minimum of 3 devices ")

    conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir)
    new_cmd = 'new ' + nodename
    execute_cdeploy(admin, new_cmd, path)
    if config.get('conf') is not None:
        confp = config.get('conf')
        for section, keys in confp.items():
            lines = '[{section}]\n'.format(section=section)
            teuthology.append_lines_to_file(admin, conf_path, lines,
                                            sudo=True)
            for key, value in keys.items():
                log.info("[%s] %s = %s" % (section, key, value))
                lines = '{key} = {value}\n'.format(key=key, value=value)
                teuthology.append_lines_to_file(admin, conf_path, lines,
                                                sudo=True)
    new_mon_install = 'install {branch} --mon '.format(
        branch=test_branch) + nodename
    new_mgr_install = 'install {branch} --mgr '.format(
        branch=test_branch) + nodename
    new_osd_install = 'install {branch} --osd '.format(
        branch=test_branch) + nodename
    new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename
    create_initial = 'mon create-initial '
    mgr_create = 'mgr create ' + nodename
    # either use create-keys or push command
    push_keys = 'admin ' + nodename
    execute_cdeploy(admin, new_mon_install, path)
    execute_cdeploy(admin, new_mgr_install, path)
    execute_cdeploy(admin, new_osd_install, path)
    execute_cdeploy(admin, new_admin, path)
    execute_cdeploy(admin, create_initial, path)
    execute_cdeploy(admin, mgr_create, path)
    execute_cdeploy(admin, push_keys, path)

    for i in range(3):
        zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i])
        prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i])
        execute_cdeploy(admin, zap_disk, path)
        execute_cdeploy(admin, prepare, path)

    log.info("list files for debugging purpose to check file permissions")
    admin.run(args=['ls', run.Raw('-lt'), conf_dir])
    remote.run(args=['sudo', 'ceph', '-s'], check_status=False)
    r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO())
    out = r.stdout.getvalue()
    log.info('Ceph health: %s', out.rstrip('\n'))
    log.info("Waiting for cluster to become healthy")
    with contextutil.safe_while(sleep=10, tries=6,
                                action='check health') as proceed:
        while proceed():
            r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO())
            out = r.stdout.getvalue()
            if (out.split(None, 1)[0] == 'HEALTH_OK'):
                break
    rgw_install = 'install {branch} --rgw {node}'.format(
        branch=test_branch,
        node=nodename,
    )
    rgw_create = 'rgw create ' + nodename
    execute_cdeploy(admin, rgw_install, path)
    execute_cdeploy(admin, rgw_create, path)
    log.info('All ceph-deploy cli tests passed')
    try:
        yield
    finally:
        log.info("cleaning up")
        ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
                        check_status=False)
        time.sleep(4)
        for i in range(3):
            umount_dev = "{d}1".format(d=devs[i])
            r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)])
        cmd = 'purge ' + nodename
        execute_cdeploy(admin, cmd, path)
        cmd = 'purgedata ' + nodename
        execute_cdeploy(admin, cmd, path)
        log.info("Removing temporary dir")
        admin.run(
            args=[
                'rm',
                run.Raw('-rf'),
                run.Raw(conf_dir)],
            check_status=False)
        if config.get('rhbuild'):
            admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
コード例 #45
0
ファイル: rest-api.py プロジェクト: AsherBond/teuthology
def task(ctx, config):
    """
    Start up rest-api.

    To start on on all clients::

        tasks:
        - ceph:
        - rest-api:

    To only run on certain clients::

        tasks:
        - ceph:
        - rest-api: [client.0, client.3]

    or

        tasks:
        - ceph:
        - rest-api:
            client.0:
            client.3:

    The general flow of things here is:
        1. Find clients on which rest-api is supposed to run (api_clients)
        2. Generate keyring values
        3. Start up ceph-rest-api daemons
    On cleanup:
        4. Stop the daemons
        5. Delete keyring value files.
    """
    api_clients = []
    remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
    log.info(remotes)
    if config == None:
        api_clients = [
            'client.{id}'.format(id=id_)
            for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')
        ]
    else:
        api_clients = config
    log.info(api_clients)
    testdir = teuthology.get_testdir(ctx)
    coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
    for rems, roles in remotes.iteritems():
        for whole_id_ in roles:
            if whole_id_ in api_clients:
                id_ = whole_id_[len('client.'):]
                keyring = '/etc/ceph/ceph.client.rest{id}.keyring'.format(
                    id=id_)
                rems.run(args=[
                    'sudo',
                    'adjust-ulimits',
                    'ceph-coverage',
                    coverage_dir,
                    'ceph-authtool',
                    '--create-keyring',
                    '--gen-key',
                    '--name=client.rest{id}'.format(id=id_),
                    '--set-uid=0',
                    '--cap',
                    'mon',
                    'allow *',
                    '--cap',
                    'osd',
                    'allow *',
                    '--cap',
                    'mds',
                    'allow',
                    keyring,
                    run.Raw('&&'),
                    'sudo',
                    'chmod',
                    '0644',
                    keyring,
                ], )
                rems.run(args=[
                    'sudo', 'sh', '-c',
                    run.Raw("'"), "echo", '[client.rest{id}]'.format(id=id_),
                    run.Raw('>>'), "/etc/ceph/ceph.conf",
                    run.Raw("'")
                ])
                rems.run(args=[
                    'sudo',
                    'sh',
                    '-c',
                    run.Raw("'"),
                    'echo',
                    'restapi',
                    'keyring',
                    '=',
                    '/etc/ceph/ceph.client.rest{id}.keyring'.format(id=id_),
                    run.Raw('>>'),
                    '/etc/ceph/ceph.conf',
                    run.Raw("'"),
                ])
                rems.run(args=[
                    'ceph',
                    'auth',
                    'import',
                    '-i',
                    '/etc/ceph/ceph.client.rest{id}.keyring'.format(id=id_),
                ])
    with contextutil.nested(
            lambda: run_rest_api_daemon(ctx=ctx, api_clients=api_clients), ):
        yield
コード例 #46
0
def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False):
    manager = ctx.manager

    osds = ctx.cluster.only(teuthology.is_type('osd'))

    TEUTHDIR = teuthology.get_testdir(ctx)
    DATADIR = os.path.join(TEUTHDIR, "data")
    DATALINECOUNT = 10000
    ERRORS = 0
    NUM_OBJECTS = config.get('objects', 10)
    log.info("objects: {num}".format(num=NUM_OBJECTS))

    pool_dump = manager.get_pool_dump(REP_POOL)
    REPID = pool_dump['pool']

    log.debug("repid={num}".format(num=REPID))

    db = {}

    LOCALDIR = tempfile.mkdtemp("cod")

    cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, REP_NAME,
                         DATALINECOUNT)
    allremote = []
    allremote.append(cli_remote)
    allremote += osds.remotes.keys()
    allremote = list(set(allremote))
    for remote in allremote:
        cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, REP_NAME,
                              DATALINECOUNT)

    ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, REP_NAME,
                        DATALINECOUNT, REP_POOL, db, ec)

    pgs = {}
    for stats in manager.get_pg_stats():
        if stats["pgid"].find(str(REPID) + ".") != 0:
            continue
        if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL:
            for osd in stats["acting"]:
                pgs.setdefault(osd, []).append(stats["pgid"])
        elif pool_dump["type"] == ceph_manager.CephManager.ERASURE_CODED_POOL:
            shard = 0
            for osd in stats["acting"]:
                pgs.setdefault(osd, []).append("{pgid}s{shard}".format(
                    pgid=stats["pgid"], shard=shard))
                shard += 1
        else:
            raise Exception("{pool} has an unexpected type {type}".format(
                pool=REP_POOL, type=pool_dump["type"]))

    log.info(pgs)
    log.info(db)

    for osd in manager.get_osd_status()['up']:
        manager.kill_osd(osd)
    time.sleep(5)

    pgswithobjects = set()
    objsinpg = {}

    # Test --op list and generate json for all objects
    log.info("Test --op list by generating json for all objects")
    prefix = ("sudo ceph-objectstore-tool "
              "--data-path {fpath} "
              "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH)
    for remote in osds.remotes.iterkeys():
        log.debug(remote)
        log.debug(osds.remotes[remote])
        for role in osds.remotes[remote]:
            if string.find(role, "osd.") != 0:
                continue
            osdid = int(role.split('.')[1])
            log.info("process osd.{id} on {remote}".format(id=osdid,
                                                           remote=remote))
            cmd = (prefix + "--op list").format(id=osdid)
            proc = remote.run(args=cmd.split(),
                              check_status=False,
                              stdout=StringIO())
            if proc.exitstatus != 0:
                log.error(
                    "Bad exit status {ret} from --op list request".format(
                        ret=proc.exitstatus))
                ERRORS += 1
            else:
                for pgline in proc.stdout.getvalue().splitlines():
                    if not pgline:
                        continue
                    (pg, obj) = json.loads(pgline)
                    name = obj['oid']
                    if name in db:
                        pgswithobjects.add(pg)
                        objsinpg.setdefault(pg, []).append(name)
                        db[name].setdefault("pg2json",
                                            {})[pg] = json.dumps(obj)

    log.info(db)
    log.info(pgswithobjects)
    log.info(objsinpg)

    if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL:
        # Test get-bytes
        log.info("Test get-bytes and set-bytes")
        for basename in db.keys():
            file = os.path.join(DATADIR, basename)
            GETNAME = os.path.join(DATADIR, "get")
            SETNAME = os.path.join(DATADIR, "set")

            for remote in osds.remotes.iterkeys():
                for role in osds.remotes[remote]:
                    if string.find(role, "osd.") != 0:
                        continue
                    osdid = int(role.split('.')[1])
                    if osdid not in pgs:
                        continue

                    for pg, JSON in db[basename]["pg2json"].iteritems():
                        if pg in pgs[osdid]:
                            cmd = ((prefix + "--pgid {pg}").format(
                                id=osdid, pg=pg).split())
                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
                            cmd += ("get-bytes {fname}".format(
                                fname=GETNAME).split())
                            proc = remote.run(args=cmd, check_status=False)
                            if proc.exitstatus != 0:
                                remote.run(args="rm -f {getfile}".format(
                                    getfile=GETNAME).split())
                                log.error("Bad exit status {ret}".format(
                                    ret=proc.exitstatus))
                                ERRORS += 1
                                continue
                            cmd = ("diff -q {file} {getfile}".format(
                                file=file, getfile=GETNAME))
                            proc = remote.run(args=cmd.split())
                            if proc.exitstatus != 0:
                                log.error("Data from get-bytes differ")
                                # log.debug("Got:")
                                # cat_file(logging.DEBUG, GETNAME)
                                # log.debug("Expected:")
                                # cat_file(logging.DEBUG, file)
                                ERRORS += 1
                            remote.run(args="rm -f {getfile}".format(
                                getfile=GETNAME).split())

                            data = ("put-bytes going into {file}\n".format(
                                file=file))
                            teuthology.write_file(remote, SETNAME, data)
                            cmd = ((prefix + "--pgid {pg}").format(
                                id=osdid, pg=pg).split())
                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
                            cmd += ("set-bytes {fname}".format(
                                fname=SETNAME).split())
                            proc = remote.run(args=cmd, check_status=False)
                            proc.wait()
                            if proc.exitstatus != 0:
                                log.info(
                                    "set-bytes failed for object {obj} "
                                    "in pg {pg} osd.{id} ret={ret}".format(
                                        obj=basename,
                                        pg=pg,
                                        id=osdid,
                                        ret=proc.exitstatus))
                                ERRORS += 1

                            cmd = ((prefix + "--pgid {pg}").format(
                                id=osdid, pg=pg).split())
                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
                            cmd += "get-bytes -".split()
                            proc = remote.run(args=cmd,
                                              check_status=False,
                                              stdout=StringIO())
                            proc.wait()
                            if proc.exitstatus != 0:
                                log.error("get-bytes after "
                                          "set-bytes ret={ret}".format(
                                              ret=proc.exitstatus))
                                ERRORS += 1
                            else:
                                if data != proc.stdout.getvalue():
                                    log.error("Data inconsistent after "
                                              "set-bytes, got:")
                                    log.error(proc.stdout.getvalue())
                                    ERRORS += 1

                            cmd = ((prefix + "--pgid {pg}").format(
                                id=osdid, pg=pg).split())
                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
                            cmd += ("set-bytes {fname}".format(
                                fname=file).split())
                            proc = remote.run(args=cmd, check_status=False)
                            proc.wait()
                            if proc.exitstatus != 0:
                                log.info(
                                    "set-bytes failed for object {obj} "
                                    "in pg {pg} osd.{id} ret={ret}".format(
                                        obj=basename,
                                        pg=pg,
                                        id=osdid,
                                        ret=proc.exitstatus))
                                ERRORS += 1

    log.info("Test list-attrs get-attr")
    for basename in db.keys():
        file = os.path.join(DATADIR, basename)
        GETNAME = os.path.join(DATADIR, "get")
        SETNAME = os.path.join(DATADIR, "set")

        for remote in osds.remotes.iterkeys():
            for role in osds.remotes[remote]:
                if string.find(role, "osd.") != 0:
                    continue
                osdid = int(role.split('.')[1])
                if osdid not in pgs:
                    continue

                for pg, JSON in db[basename]["pg2json"].iteritems():
                    if pg in pgs[osdid]:
                        cmd = ((prefix + "--pgid {pg}").format(id=osdid,
                                                               pg=pg).split())
                        cmd.append(run.Raw("'{json}'".format(json=JSON)))
                        cmd += ["list-attrs"]
                        proc = remote.run(args=cmd,
                                          check_status=False,
                                          stdout=StringIO(),
                                          stderr=StringIO())
                        proc.wait()
                        if proc.exitstatus != 0:
                            log.error("Bad exit status {ret}".format(
                                ret=proc.exitstatus))
                            ERRORS += 1
                            continue
                        keys = proc.stdout.getvalue().split()
                        values = dict(db[basename]["xattr"])

                        for key in keys:
                            if (key == "_" or key == "snapset"
                                    or key == "hinfo_key"):
                                continue
                            key = key.strip("_")
                            if key not in values:
                                log.error(
                                    "The key {key} should be present".format(
                                        key=key))
                                ERRORS += 1
                                continue
                            exp = values.pop(key)
                            cmd = ((prefix + "--pgid {pg}").format(
                                id=osdid, pg=pg).split())
                            cmd.append(run.Raw("'{json}'".format(json=JSON)))
                            cmd += ("get-attr {key}".format(key="_" +
                                                            key).split())
                            proc = remote.run(args=cmd,
                                              check_status=False,
                                              stdout=StringIO())
                            proc.wait()
                            if proc.exitstatus != 0:
                                log.error("get-attr failed with {ret}".format(
                                    ret=proc.exitstatus))
                                ERRORS += 1
                                continue
                            val = proc.stdout.getvalue()
                            if exp != val:
                                log.error("For key {key} got value {got} "
                                          "instead of {expected}".format(
                                              key=key, got=val, expected=exp))
                                ERRORS += 1
                        if "hinfo_key" in keys:
                            cmd_prefix = prefix.format(id=osdid)
                            cmd = """
      expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64)
      echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} -
      test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder
      echo $expected | base64 --decode | \
         {prefix} --pgid {pg} '{json}' set-attr {key} -
      test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected
                            """.format(prefix=cmd_prefix,
                                       pg=pg,
                                       json=JSON,
                                       key="hinfo_key")
                            log.debug(cmd)
                            proc = remote.run(
                                args=['bash', '-e', '-x', '-c', cmd],
                                check_status=False,
                                stdout=StringIO(),
                                stderr=StringIO())
                            proc.wait()
                            if proc.exitstatus != 0:
                                log.error("failed with " +
                                          str(proc.exitstatus))
                                log.error(proc.stdout.getvalue() + " " +
                                          proc.stderr.getvalue())
                                ERRORS += 1

                        if len(values) != 0:
                            log.error("Not all keys found, remaining keys:")
                            log.error(values)

    log.info("Test pg info")
    for remote in osds.remotes.iterkeys():
        for role in osds.remotes[remote]:
            if string.find(role, "osd.") != 0:
                continue
            osdid = int(role.split('.')[1])
            if osdid not in pgs:
                continue

            for pg in pgs[osdid]:
                cmd = ((prefix + "--op info --pgid {pg}").format(
                    id=osdid, pg=pg).split())
                proc = remote.run(args=cmd,
                                  check_status=False,
                                  stdout=StringIO())
                proc.wait()
                if proc.exitstatus != 0:
                    log.error("Failure of --op info command with {ret}".format(
                        proc.exitstatus))
                    ERRORS += 1
                    continue
                info = proc.stdout.getvalue()
                if not str(pg) in info:
                    log.error("Bad data from info: {info}".format(info=info))
                    ERRORS += 1

    log.info("Test pg logging")
    for remote in osds.remotes.iterkeys():
        for role in osds.remotes[remote]:
            if string.find(role, "osd.") != 0:
                continue
            osdid = int(role.split('.')[1])
            if osdid not in pgs:
                continue

            for pg in pgs[osdid]:
                cmd = ((prefix + "--op log --pgid {pg}").format(id=osdid,
                                                                pg=pg).split())
                proc = remote.run(args=cmd,
                                  check_status=False,
                                  stdout=StringIO())
                proc.wait()
                if proc.exitstatus != 0:
                    log.error("Getting log failed for pg {pg} "
                              "from osd.{id} with {ret}".format(
                                  pg=pg, id=osdid, ret=proc.exitstatus))
                    ERRORS += 1
                    continue
                HASOBJ = pg in pgswithobjects
                MODOBJ = "modify" in proc.stdout.getvalue()
                if HASOBJ != MODOBJ:
                    log.error("Bad log for pg {pg} from osd.{id}".format(
                        pg=pg, id=osdid))
                    MSG = (HASOBJ and [""] or ["NOT "])[0]
                    log.error(
                        "Log should {msg}have a modify entry".format(msg=MSG))
                    ERRORS += 1

    log.info("Test pg export")
    EXP_ERRORS = 0
    for remote in osds.remotes.iterkeys():
        for role in osds.remotes[remote]:
            if string.find(role, "osd.") != 0:
                continue
            osdid = int(role.split('.')[1])
            if osdid not in pgs:
                continue

            for pg in pgs[osdid]:
                fpath = os.path.join(DATADIR, "osd{id}.{pg}".format(id=osdid,
                                                                    pg=pg))

                cmd = ((prefix +
                        "--op export --pgid {pg} --file {file}").format(
                            id=osdid, pg=pg, file=fpath))
                proc = remote.run(args=cmd,
                                  check_status=False,
                                  stdout=StringIO())
                proc.wait()
                if proc.exitstatus != 0:
                    log.error("Exporting failed for pg {pg} "
                              "on osd.{id} with {ret}".format(
                                  pg=pg, id=osdid, ret=proc.exitstatus))
                    EXP_ERRORS += 1

    ERRORS += EXP_ERRORS

    log.info("Test pg removal")
    RM_ERRORS = 0
    for remote in osds.remotes.iterkeys():
        for role in osds.remotes[remote]:
            if string.find(role, "osd.") != 0:
                continue
            osdid = int(role.split('.')[1])
            if osdid not in pgs:
                continue

            for pg in pgs[osdid]:
                cmd = ((prefix + "--op remove --pgid {pg}").format(pg=pg,
                                                                   id=osdid))
                proc = remote.run(args=cmd,
                                  check_status=False,
                                  stdout=StringIO())
                proc.wait()
                if proc.exitstatus != 0:
                    log.error("Removing failed for pg {pg} "
                              "on osd.{id} with {ret}".format(
                                  pg=pg, id=osdid, ret=proc.exitstatus))
                    RM_ERRORS += 1

    ERRORS += RM_ERRORS

    IMP_ERRORS = 0
    if EXP_ERRORS == 0 and RM_ERRORS == 0:
        log.info("Test pg import")

        for remote in osds.remotes.iterkeys():
            for role in osds.remotes[remote]:
                if string.find(role, "osd.") != 0:
                    continue
                osdid = int(role.split('.')[1])
                if osdid not in pgs:
                    continue

                for pg in pgs[osdid]:
                    fpath = os.path.join(
                        DATADIR, "osd{id}.{pg}".format(id=osdid, pg=pg))

                    cmd = ((prefix + "--op import --file {file}").format(
                        id=osdid, file=fpath))
                    proc = remote.run(args=cmd,
                                      check_status=False,
                                      stdout=StringIO())
                    proc.wait()
                    if proc.exitstatus != 0:
                        log.error(
                            "Import failed from {file} with {ret}".format(
                                file=fpath, ret=proc.exitstatus))
                        IMP_ERRORS += 1
    else:
        log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES")

    ERRORS += IMP_ERRORS

    if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0:
        log.info("Restarting OSDs....")
        # They are still look to be up because of setting nodown
        for osd in manager.get_osd_status()['up']:
            manager.revive_osd(osd)
        # Wait for health?
        time.sleep(5)
        # Let scrub after test runs verify consistency of all copies
        log.info("Verify replicated import data")
        objects = range(1, NUM_OBJECTS + 1)
        for i in objects:
            NAME = REP_NAME + "{num}".format(num=i)
            TESTNAME = os.path.join(DATADIR, "gettest")
            REFNAME = os.path.join(DATADIR, NAME)

            proc = rados(ctx,
                         cli_remote, ['-p', REP_POOL, 'get', NAME, TESTNAME],
                         wait=False)

            ret = proc.wait()
            if ret != 0:
                log.error("After import, rados get failed with {ret}".format(
                    ret=proc.exitstatus))
                ERRORS += 1
                continue

            cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME,
                                                   ref=REFNAME)
            proc = cli_remote.run(args=cmd, check_status=False)
            proc.wait()
            if proc.exitstatus != 0:
                log.error("Data comparison failed for {obj}".format(obj=NAME))
                ERRORS += 1

    return ERRORS
コード例 #47
0
ファイル: cephadm.py プロジェクト: LargerPanda/ceph-1
def ceph_mons(ctx, config):
    """
    Deploy any additional mons
    """
    cluster_name = config['cluster']
    fsid = ctx.ceph[cluster_name].fsid
    num_mons = 1

    try:
        for remote, roles in ctx.cluster.remotes.items():
            for mon in [
                    r for r in roles
                    if teuthology.is_type('mon', cluster_name)(r)
            ]:
                c_, _, id_ = teuthology.split_role(mon)
                if c_ == cluster_name and id_ == ctx.ceph[
                        cluster_name].first_mon:
                    continue
                log.info('Adding %s on %s' % (mon, remote.shortname))
                num_mons += 1
                _shell(ctx, cluster_name, remote, [
                    'ceph',
                    'orch',
                    'daemon',
                    'add',
                    'mon',
                    remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] +
                    '=' + id_,
                ])
                ctx.daemons.register_daemon(
                    remote,
                    'mon',
                    id_,
                    cluster=cluster_name,
                    fsid=fsid,
                    logger=log.getChild(mon),
                    wait=False,
                    started=True,
                )

                with contextutil.safe_while(sleep=1, tries=180) as proceed:
                    while proceed():
                        log.info('Waiting for %d mons in monmap...' %
                                 (num_mons))
                        r = _shell(
                            ctx=ctx,
                            cluster_name=cluster_name,
                            remote=remote,
                            args=[
                                'ceph',
                                'mon',
                                'dump',
                                '-f',
                                'json',
                            ],
                            stdout=StringIO(),
                        )
                        j = json.loads(r.stdout.getvalue())
                        if len(j['mons']) == num_mons:
                            break

        # refresh our (final) ceph.conf file
        log.info('Generating final ceph.conf file...')
        r = _shell(
            ctx=ctx,
            cluster_name=cluster_name,
            remote=remote,
            args=[
                'ceph',
                'config',
                'generate-minimal-conf',
            ],
            stdout=StringIO(),
        )
        ctx.ceph[cluster_name].config_file = r.stdout.getvalue()

        yield

    finally:
        pass
コード例 #48
0
def task(ctx, config):
    """
     pre-validation still pending
    """

    log.info('starting rgw-longrunning')
    log.info('config %s' % config)
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        "task set-repo only supports a dictionary for configuration"
    config_file_name = config['test'] + ".yaml"
    log.info('test_version: %s' % config.get('test_version', 'v2'))
    log.info('test: %s' % config['test'])
    branch = config.get('branch', 'master')
    log.info('script: %s' % config.get('script', config['test'] + ".py"))
    test_root_dir = 'rgw-tests'
    test_base_path = os.path.join(test_root_dir, 'ceph-qe-scripts')
    script = os.path.join(test_base_path,
                          DIR[config.get('test_version', 'v2')]['script'],
                          config.get('script', config['test'] + ".py"))
    config_file = os.path.join(test_base_path,
                               DIR[config.get('test_version', 'v2')]['config'],
                               config_file_name)
    log.info('script: %s' % script)
    log.info('config_file: %s' % config_file)
    soot = ['venv', 'rgw-tests', 'io_info.yaml', '*.json', 'Download.*', 'Download', '*.mpFile', 'x*', 'key.*', 'Mp.*',
            '*.key.*']
    cleanup = lambda x: clients[0].run(args=[run.Raw('sudo rm -rf %s' % x)])
    remotes = ctx.cluster.only(teuthology.is_type('client'))
    clients = [
        remote for remote,
                   roles_for_host in remotes.remotes.items()]
    list(map(cleanup, soot))
    clients[0].run(args=['mkdir', test_root_dir])
    log.info('cloning the repo to %s' % clients[0].hostname)
    clients[0].run(
        args=[
            'cd',
            '%s' % test_root_dir,
            run.Raw(';'),
            'git',
            'clone',
            'https://github.com/red-hat-storage/ceph-qe-scripts.git',
            '-b',
            '%s' % branch
        ])
    mapped_sizes = do_auto_calculate_io(clients, config)
    test_config = {'config': config.get('config')}
    test_config['config']['objects_count'] = len(mapped_sizes)
    test_config['config']['mapped_sizes'] = mapped_sizes
    log.info('config: %s' % test_config)
    log.info('creating configuration from data: %s' % test_config)
    local_file = os.path.join('/tmp/',
                              config_file_name + "_" + str(os.getpid()) + pwd.getpwuid(os.getuid()).pw_name)
    with open(local_file, 'w') as outfile:
        outfile.write(yaml.dump(test_config, default_flow_style=False))
    log.info('local_file: %s' % local_file)
    log.info('copying temp yaml to the client node')
    clients[0].put_file(local_file, config_file)
    clients[0].run(args=['ls', '-lt', os.path.join(test_base_path,
                                                   DIR[config.get('test_version', 'v2')]['config'])])
    clients[0].run(args=['cat', config_file])
    # os.remove(local_file)
    clients[0].run(args=['python3', '-m', 'venv', 'venv'])
    clients[0].run(
        args=[
            'source',
            'venv/bin/activate',
            run.Raw(';'),
            run.Raw('pip3 install boto boto3 names PyYaml ConfigParser'),
            run.Raw(';'),
            'deactivate'])
    time.sleep(60)
    log.info('trying to restart rgw service after sleep 60 secs')
    clients[0].run(args=[run.Raw('sudo systemctl restart ceph-radosgw.target')])
    log.info('starting the tests after sleep of 60 secs')
    time.sleep(60)
    clients[0].run(
        args=[run.Raw(
            'sudo venv/bin/python3 %s -c %s ' % (script, config_file))])
    try:
        yield
    finally:
        log.info('Test completed')
        log.info('Cluster size after test completion')
        cluster_size = get_cluster_size_info(clients)
        log.info('available: %s' % cluster_size['AVAIL'])
        log.info("Deleting leftovers")
        list(map(cleanup, soot))
コード例 #49
0
ファイル: ceph2.py プロジェクト: xiemylogos/ceph
def ceph_mons(ctx, config):
    """
    Deploy any additional mons
    """
    cluster_name = config['cluster']
    fsid = ctx.ceph[cluster_name].fsid
    testdir = teuthology.get_testdir(ctx)
    num_mons = 1

    try:
        for remote, roles in ctx.cluster.remotes.items():
            for mon in [
                    r for r in roles
                    if teuthology.is_type('mon', cluster_name)(r)
            ]:
                c_, _, id_ = teuthology.split_role(mon)
                if c_ == cluster_name and id_ == ctx.ceph[
                        cluster_name].first_mon:
                    continue
                log.info('Adding %s on %s' % (mon, remote.shortname))
                num_mons += 1
                _shell(ctx, cluster_name, remote, [
                    'ceph',
                    'orchestrator',
                    'mon',
                    'update',
                    str(num_mons),
                    remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] +
                    '=' + id_,
                ])
                ctx.daemons.register_daemon(
                    remote,
                    'mon',
                    id_,
                    cluster=cluster_name,
                    fsid=fsid,
                    logger=log.getChild(mon),
                    wait=False,
                    started=True,
                )

                with contextutil.safe_while(sleep=1, tries=180) as proceed:
                    while proceed():
                        log.info('Waiting for %d mons in monmap...' %
                                 (num_mons))
                        r = _shell(
                            ctx=ctx,
                            cluster_name=cluster_name,
                            remote=remote,
                            args=[
                                'ceph',
                                'mon',
                                'dump',
                                '-f',
                                'json',
                            ],
                            stdout=StringIO(),
                        )
                        j = json.loads(r.stdout.getvalue())
                        if len(j['mons']) == num_mons:
                            break

        # refresh ceph.conf files for all mons + first mgr
        """
        for remote, roles in ctx.cluster.remotes.items():
            for mon in [r for r in roles
                        if teuthology.is_type('mon', cluster_name)(r)]:
                c_, _, id_ = teuthology.split_role(mon)
                _shell(ctx, cluster_name, remote, [
                    'ceph', 'orchestrator', 'service', 'redeploy',
                    'mon', id_,
                ])
        _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, [
            'ceph', 'orchestrator', 'service', 'redeploy',
            'mgr', ctx.ceph[cluster_name].first_mgr,
        ])
        """

        yield

    finally:
        pass
コード例 #50
0
ファイル: hadoop.py プロジェクト: alsall/teuthology
def binaries(ctx, config):
    """
    Fetch the binaries from the gitbuilder, and spawn the download tasks on
    the remote machines.
    """
    path = config.get('path')

    if path is None:
        # fetch Apache Hadoop from gitbuilder
        log.info(
            'Fetching and unpacking Apache Hadoop binaries from gitbuilder...')
        apache_sha1, apache_hadoop_bindir_url = teuthology.get_ceph_binary_url(
            package='apache-hadoop',
            branch=config.get('apache_branch'),
            tag=config.get('tag'),
            sha1=config.get('sha1'),
            flavor=config.get('flavor'),
            format=config.get('format'),
            dist=config.get('dist'),
            arch=config.get('arch'),
        )
        log.info('apache_hadoop_bindir_url %s' % (apache_hadoop_bindir_url))
        ctx.summary['apache-hadoop-sha1'] = apache_sha1

        # fetch Inktank Hadoop from gitbuilder
        log.info(
            'Fetching and unpacking Inktank Hadoop binaries from gitbuilder...'
        )
        inktank_sha1, inktank_hadoop_bindir_url = \
            teuthology.get_ceph_binary_url(
                package='hadoop',
                branch=config.get('inktank_branch'),
                tag=config.get('tag'),
                sha1=config.get('sha1'),
                flavor=config.get('flavor'),
                format=config.get('format'),
                dist=config.get('dist'),
                arch=config.get('arch'),
                )
        log.info('inktank_hadoop_bindir_url %s' % (inktank_hadoop_bindir_url))
        ctx.summary['inktank-hadoop-sha1'] = inktank_sha1

    else:
        raise Exception(
            "The hadoop task does not support the path argument at present")

    with parallel() as parallel_task:
        hadoop_nodes = ctx.cluster.only(teuthology.is_type('hadoop'))
        # these can happen independently
        for remote in hadoop_nodes.remotes.iterkeys():
            parallel_task.spawn(_node_binaries, ctx, remote,
                                inktank_hadoop_bindir_url,
                                apache_hadoop_bindir_url)

    try:
        yield
    finally:
        log.info('Removing hadoop binaries...')
        run.wait(
            ctx.cluster.run(
                args=[
                    'rm', '-rf', '--', '{tdir}/apache_hadoop'.format(
                        tdir=teuthology.get_testdir(ctx))
                ],
                wait=False,
            ), )
        run.wait(
            ctx.cluster.run(
                args=[
                    'rm', '-rf', '--', '{tdir}/inktank_hadoop'.format(
                        tdir=teuthology.get_testdir(ctx))
                ],
                wait=False,
            ), )
コード例 #51
0
def _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path):
    local_mstore = tempfile.mkdtemp()

    # collect the maps from all OSDs
    is_osd = teuthology.is_type('osd')
    osds = ctx.cluster.only(is_osd)
    assert osds
    for osd, roles in osds.remotes.items():
        for role in roles:
            if not is_osd(role):
                continue
            cluster, _, osd_id = teuthology.split_role(role)
            assert cluster_name == cluster
            log.info('collecting maps from {cluster}:osd.{osd}'.format(
                cluster=cluster,
                osd=osd_id))
            # push leveldb to OSD
            osd_mstore = os.path.join(teuthology.get_testdir(ctx), 'mon-store')
            osd.run(args=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore])

            _push_directory(local_mstore, osd, osd_mstore)
            log.info('rm -rf {0}'.format(local_mstore))
            shutil.rmtree(local_mstore)
            # update leveldb with OSD data
            options = '--no-mon-config --op update-mon-db --mon-store-path {0}'
            log.info('cot {0}'.format(osd_mstore))
            manager.objectstore_tool(pool=None,
                                     options=options.format(osd_mstore),
                                     args='',
                                     osd=osd_id,
                                     do_revive=False)
            # pull the updated mon db
            log.info('pull dir {0} -> {1}'.format(osd_mstore, local_mstore))
            local_mstore = tempfile.mkdtemp()
            teuthology.pull_directory(osd, osd_mstore, local_mstore)
            log.info('rm -rf osd:{0}'.format(osd_mstore))
            osd.run(args=['sudo', 'rm', '-fr', osd_mstore])

    # recover the first_mon with re-built mon db
    # pull from recovered leveldb from client
    mon_store_dir = os.path.join('/var/lib/ceph/mon',
                                 '{0}-{1}'.format(cluster_name, mon_id))
    _push_directory(local_mstore, mon, mon_store_dir)
    mon.run(args=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir])
    shutil.rmtree(local_mstore)

    # fill up the caps in the keyring file
    mon.run(args=['sudo',
                  'ceph-authtool', keyring_path,
                  '-n', 'mon.',
                  '--cap', 'mon', 'allow *'])
    mon.run(args=['sudo',
                  'ceph-authtool', keyring_path,
                  '-n', 'client.admin',
                  '--cap', 'mon', 'allow *',
                  '--cap', 'osd', 'allow *',
                  '--cap', 'mds', 'allow *',
                  '--cap', 'mgr', 'allow *'])
    mon.run(args=['sudo', '-u', 'ceph',
                  'CEPH_ARGS=--no-mon-config',
                  'ceph-monstore-tool', mon_store_dir,
                  'rebuild', '--',
                  '--keyring', keyring_path,
                  '--monmap', '/tmp/monmap',
                  ])
コード例 #52
0
def task(ctx, config):
    """
     Run Hadoop S3A tests using Ceph
     usage:
      -tasks:
         ceph-ansible:
         s3a-hadoop:
           maven-version: '3.3.9' (default)
           hadoop-version: '2.7.3'
           bucket-name: 's3atest' (default)
           access-key: 'anykey' (uses a default value)
           secret-key: 'secretkey' ( uses a default value)
    """
    if config is None:
        config = {}

    assert isinstance(config, dict), \
        "task only supports a dictionary for configuration"

    overrides = ctx.config.get('overrides', {})
    misc.deep_merge(config, overrides.get('s3a-hadoop', {}))
    testdir = misc.get_testdir(ctx)
    rgws = ctx.cluster.only(misc.is_type('rgw'))
    # use the first rgw node to test s3a
    rgw_node = rgws.remotes.keys()[0]
    # get versions
    maven_major = config.get('maven-major', 'maven-3')
    maven_version = config.get('maven-version', '3.3.9')
    hadoop_ver = config.get('hadoop-version', '2.7.3')
    bucket_name = config.get('bucket-name', 's3atest')
    access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F')
    secret_key = config.get('secret-key',
                            'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb')

    # set versions for cloning the repo
    apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format(
        maven_version=maven_version)
    maven_link = 'http://mirror.jax.hugeserver.com/apache/maven/' + \
        '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + apache_maven
    hadoop_git = 'https://github.com/apache/hadoop'
    hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver)
    install_prereq(rgw_node)
    rgw_node.run(args=[
        'cd', testdir,
        run.Raw('&&'), 'wget', maven_link,
        run.Raw('&&'), 'tar', '-xvf', apache_maven,
        run.Raw('&&'), 'git', 'clone',
        run.Raw(hadoop_git),
        run.Raw('&&'), 'cd', 'hadoop',
        run.Raw('&&'), 'git', 'checkout', '-b',
        run.Raw(hadoop_rel)
    ])
    dnsmasq_name = 's3.ceph.com'
    configure_s3a(rgw_node, dnsmasq_name, access_key, secret_key, bucket_name,
                  testdir)
    setup_dnsmasq(rgw_node, dnsmasq_name)
    fix_rgw_config(rgw_node, dnsmasq_name)
    setup_user_bucket(rgw_node, dnsmasq_name, access_key, secret_key,
                      bucket_name, testdir)
    if hadoop_ver.startswith('2.8'):
        test_options = '-Dit.test=ITestS3A* -Dparallel-tests -Dscale -Dfs.s3a.scale.test.huge.filesize=128M verify'
    else:
        test_options = 'test -Dtest=S3a*,TestS3A*'
    try:
        run_s3atest(rgw_node, maven_version, testdir, test_options)
        yield
    finally:
        log.info("Done s3a testing, Cleaning up")
        for fil in ['apache*', 'hadoop*', 'venv*', 'create*']:
            rgw_node.run(args=[
                'rm',
                run.Raw('-rf'),
                run.Raw('{tdir}/{file}'.format(tdir=testdir, file=fil))
            ])
        # restart and let NM restore original config
        rgw_node.run(args=['sudo', 'systemctl', 'stop', 'dnsmasq'])
        rgw_node.run(args=['sudo', 'systemctl', 'restart', 'network.service'],
                     check_status=False)
        rgw_node.run(args=['sudo', 'systemctl', 'status', 'network.service'],
                     check_status=False)
コード例 #53
0
def task(ctx, config):
    if config is None:
        config = {}

    assert isinstance(config, dict), \
        "task only supports a dictionary for configuration"

    overrides = ctx.config.get('overrides', {})
    teuthology.deep_merge(config, overrides.get('ceph', {}))
    log.info('Config: ' + str(config))

    testdir = teuthology.get_testdir(ctx)

    # set up cluster context
    first_ceph_cluster = False
    if not hasattr(ctx, 'daemons'):
        first_ceph_cluster = True
    if not hasattr(ctx, 'ceph'):
        ctx.ceph = {}
        ctx.managers = {}
    if 'cluster' not in config:
        config['cluster'] = 'ceph'
    cluster_name = config['cluster']
    ctx.ceph[cluster_name] = argparse.Namespace()

    ctx.ceph[cluster_name].thrashers = []
    # fixme: setup watchdog, ala ceph.py

    ctx.ceph[cluster_name].roleless = False  # see below

    # cephadm mode?
    if 'cephadm_mode' not in config:
        config['cephadm_mode'] = 'root'
    assert config['cephadm_mode'] in ['root', 'cephadm-package']
    if config['cephadm_mode'] == 'root':
        ctx.cephadm = testdir + '/cephadm'
    else:
        ctx.cephadm = 'cephadm'  # in the path

    if first_ceph_cluster:
        # FIXME: this is global for all clusters
        ctx.daemons = DaemonGroup(use_cephadm=ctx.cephadm)

    # image
    ctx.ceph[cluster_name].image = config.get('image')
    ref = None
    if not ctx.ceph[cluster_name].image:
        sha1 = config.get('sha1')
        if sha1:
            ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % sha1
            ref = sha1
        else:
            # hmm, fall back to branch?
            branch = config.get('branch', 'master')
            ref = branch
            ctx.ceph[cluster_name].image = 'quay.io/ceph-ci/ceph:%s' % branch
    log.info('Cluster image is %s' % ctx.ceph[cluster_name].image)

    # uuid
    fsid = str(uuid.uuid1())
    log.info('Cluster fsid is %s' % fsid)
    ctx.ceph[cluster_name].fsid = fsid

    # mon ips
    log.info('Choosing monitor IPs and ports...')
    remotes_and_roles = ctx.cluster.remotes.items()
    roles = [role_list for (remote, role_list) in remotes_and_roles]
    ips = [
        host for (host, port) in (remote.ssh.get_transport().getpeername()
                                  for (remote, role_list) in remotes_and_roles)
    ]

    if config.get('roleless', False):
        # mons will be named after hosts
        n = len(roles)
        roles = []
        first_mon = None
        for remote, _ in remotes_and_roles:
            roles.append(['mon.' + remote.shortname])
            if not first_mon:
                first_mon = remote.shortname
                bootstrap_remote = remote
        log.info('No roles; fabricating mons %s' % roles)

    ctx.ceph[cluster_name].mons = get_mons(
        roles,
        ips,
        cluster_name,
        mon_bind_msgr2=config.get('mon_bind_msgr2', True),
        mon_bind_addrvec=config.get('mon_bind_addrvec', True),
    )
    log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons)

    if config.get('roleless', False):
        ctx.ceph[cluster_name].roleless = True
        ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
        ctx.ceph[cluster_name].first_mon = first_mon
        ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon
    else:
        first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0]
        _, _, first_mon = teuthology.split_role(first_mon_role)
        (bootstrap_remote, ) = ctx.cluster.only(first_mon_role).remotes.keys()
        log.info('First mon is mon.%s on %s' %
                 (first_mon, bootstrap_remote.shortname))
        ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
        ctx.ceph[cluster_name].first_mon = first_mon
        ctx.ceph[cluster_name].first_mon_role = first_mon_role

        others = ctx.cluster.remotes[bootstrap_remote]
        mgrs = sorted(
            [r for r in others if teuthology.is_type('mgr', cluster_name)(r)])
        if not mgrs:
            raise RuntimeError('no mgrs on the same host as first mon %s' %
                               first_mon)
        _, _, first_mgr = teuthology.split_role(mgrs[0])
        log.info('First mgr is %s' % (first_mgr))
        ctx.ceph[cluster_name].first_mgr = first_mgr

    with contextutil.nested(
            lambda: ceph_initial(),
            lambda: normalize_hostnames(ctx=ctx),
            lambda: download_cephadm(ctx=ctx, config=config, ref=ref),
            lambda: ceph_log(ctx=ctx, config=config),
            lambda: ceph_crash(ctx=ctx, config=config),
            lambda: ceph_bootstrap(ctx=ctx, config=config),
            lambda: crush_setup(ctx=ctx, config=config),
            lambda: ceph_mons(ctx=ctx, config=config),
            lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config
                                                        ),
            lambda: ceph_mgrs(ctx=ctx, config=config),
            lambda: ceph_osds(ctx=ctx, config=config),
            lambda: ceph_mdss(ctx=ctx, config=config),
            lambda: ceph_rgw(ctx=ctx, config=config),
            lambda: ceph_monitoring('prometheus', ctx=ctx, config=config),
            lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config),
            lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config),
            lambda: ceph_monitoring('grafana', ctx=ctx, config=config),
            lambda: ceph_clients(ctx=ctx, config=config),
    ):
        ctx.managers[cluster_name] = CephManager(
            ctx.ceph[cluster_name].bootstrap_remote,
            ctx=ctx,
            logger=log.getChild('ceph_manager.' + cluster_name),
            cluster=cluster_name,
            cephadm=True,
        )

        try:
            if config.get('wait-for-healthy', True):
                healthy(ctx=ctx, config=config)

            log.info('Setup complete, yielding')
            yield

        finally:
            log.info('Teardown begin')
コード例 #54
0
    def run_haproxy(self):
        """
        task:
            ceph-ansible:
                haproxy: true
                haproxy_repo: https://github.com/smanjara/ansible-haproxy.git
                haproxy_branch: master
        """
        # Clone haproxy from https://github.com/smanjara/ansible-haproxy/,
        # use inven.yml from ceph-ansible dir to read haproxy node from
        # Assumes haproxy roles such as haproxy.0, haproxy.1 and so on.

        installer_node = self.ceph_installer
        haproxy_ansible_repo = self.config['haproxy_repo']
        branch = 'master'
        if self.config.get('haproxy_branch'):
            branch = self.config.get('haproxy_branch')

        installer_node.run(args=[
            'cd',
            run.Raw('~/'),
            run.Raw(';'),
            'git',
            'clone',
            run.Raw('-b %s' % branch),
            run.Raw(haproxy_ansible_repo),
        ],
                           timeout=4200,
                           stdout=StringIO())
        allhosts = self.each_cluster.only(misc.is_type('rgw')).remotes.keys()
        clients = list(set(allhosts))
        ips = []
        for each_client in clients:
            ips.append(socket.gethostbyname(each_client.hostname))

        # substitute {{ ip_var' }} in haproxy.yml file with rgw node ips
        ip_vars = {}
        for i in range(len(ips)):
            ip_vars['ip_var' + str(i)] = ips.pop()

        # run haproxy playbook
        args = [
            'ANSIBLE_STDOUT_CALLBACK=debug', 'ansible-playbook', '-vv',
            'haproxy.yml', '-e',
            "'%s'" % json.dumps(ip_vars), '-i', '~/ceph-ansible/inven.yml'
        ]
        log.debug("Running %s", args)
        str_args = ' '.join(args)
        installer_node.run(args=[
            run.Raw('cd ~/ansible-haproxy'),
            run.Raw(';'),
            run.Raw(str_args)
        ])
        # run keepalived playbook
        args = [
            'ANSIBLE_STDOUT_CALLBACK=debug', 'ansible-playbook', '-vv',
            'keepalived.yml', '-e',
            "'%s'" % json.dumps(ip_vars), '-i', '~/ceph-ansible/inven.yml'
        ]
        log.debug("Running %s", args)
        str_args = ' '.join(args)
        installer_node.run(args=[
            run.Raw('cd ~/ansible-haproxy'),
            run.Raw(';'),
            run.Raw(str_args)
        ])
コード例 #55
0
ファイル: cephadm.py プロジェクト: LargerPanda/ceph-1
def initialize_config(ctx, config):
    cluster_name = config['cluster']
    testdir = teuthology.get_testdir(ctx)

    ctx.ceph[cluster_name].thrashers = []
    # fixme: setup watchdog, ala ceph.py

    ctx.ceph[cluster_name].roleless = False  # see below

    first_ceph_cluster = False
    if not hasattr(ctx, 'daemons'):
        first_ceph_cluster = True

    # cephadm mode?
    if 'cephadm_mode' not in config:
        config['cephadm_mode'] = 'root'
    assert config['cephadm_mode'] in ['root', 'cephadm-package']
    if config['cephadm_mode'] == 'root':
        ctx.cephadm = testdir + '/cephadm'
    else:
        ctx.cephadm = 'cephadm'  # in the path

    if first_ceph_cluster:
        # FIXME: this is global for all clusters
        ctx.daemons = DaemonGroup(use_cephadm=ctx.cephadm)

    # uuid
    fsid = str(uuid.uuid1())
    log.info('Cluster fsid is %s' % fsid)
    ctx.ceph[cluster_name].fsid = fsid

    # mon ips
    log.info('Choosing monitor IPs and ports...')
    remotes_and_roles = ctx.cluster.remotes.items()
    ips = [
        host for (host, port) in (remote.ssh.get_transport().getpeername()
                                  for (remote, role_list) in remotes_and_roles)
    ]

    if config.get('roleless', False):
        # mons will be named after hosts
        first_mon = None
        for remote, _ in remotes_and_roles:
            ctx.cluster.remotes[remote].append('mon.' + remote.shortname)
            if not first_mon:
                first_mon = remote.shortname
                bootstrap_remote = remote
        log.info('No mon roles; fabricating mons')

    roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()]

    ctx.ceph[cluster_name].mons = get_mons(
        roles,
        ips,
        cluster_name,
        mon_bind_msgr2=config.get('mon_bind_msgr2', True),
        mon_bind_addrvec=config.get('mon_bind_addrvec', True),
    )
    log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons)

    if config.get('roleless', False):
        ctx.ceph[cluster_name].roleless = True
        ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
        ctx.ceph[cluster_name].first_mon = first_mon
        ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon
    else:
        first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0]
        _, _, first_mon = teuthology.split_role(first_mon_role)
        (bootstrap_remote, ) = ctx.cluster.only(first_mon_role).remotes.keys()
        log.info('First mon is mon.%s on %s' %
                 (first_mon, bootstrap_remote.shortname))
        ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
        ctx.ceph[cluster_name].first_mon = first_mon
        ctx.ceph[cluster_name].first_mon_role = first_mon_role

        others = ctx.cluster.remotes[bootstrap_remote]
        mgrs = sorted(
            [r for r in others if teuthology.is_type('mgr', cluster_name)(r)])
        if not mgrs:
            raise RuntimeError('no mgrs on the same host as first mon %s' %
                               first_mon)
        _, _, first_mgr = teuthology.split_role(mgrs[0])
        log.info('First mgr is %s' % (first_mgr))
        ctx.ceph[cluster_name].first_mgr = first_mgr
    yield
コード例 #56
0
ファイル: ceph2.py プロジェクト: xiemylogos/ceph
def ceph_bootstrap(ctx, config):
    cluster_name = config['cluster']
    testdir = teuthology.get_testdir(ctx)
    fsid = ctx.ceph[cluster_name].fsid

    mons = ctx.ceph[cluster_name].mons
    first_mon_role = sorted(mons.keys())[0]
    _, _, first_mon = teuthology.split_role(first_mon_role)
    (bootstrap_remote, ) = ctx.cluster.only(first_mon_role).remotes.keys()
    log.info('First mon is mon.%s on %s' %
             (first_mon, bootstrap_remote.shortname))
    ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
    ctx.ceph[cluster_name].first_mon = first_mon

    others = ctx.cluster.remotes[bootstrap_remote]
    log.info('others %s' % others)
    mgrs = sorted(
        [r for r in others if teuthology.is_type('mgr', cluster_name)(r)])
    if not mgrs:
        raise RuntimeError('no mgrs on the same host as first mon %s' %
                           first_mon)
    _, _, first_mgr = teuthology.split_role(mgrs[0])
    log.info('First mgr is %s' % (first_mgr))
    ctx.ceph[cluster_name].first_mgr = first_mgr

    try:
        # write seed config
        log.info('Writing seed config...')
        conf_fp = StringIO()
        seed_config = build_initial_config(ctx, config)
        seed_config.write(conf_fp)
        teuthology.write_file(remote=bootstrap_remote,
                              path='{}/seed.{}.conf'.format(
                                  testdir, cluster_name),
                              data=conf_fp.getvalue())
        log.debug('Final config:\n' + conf_fp.getvalue())

        # bootstrap
        log.info('Bootstrapping...')
        cmd = [
            'sudo',
            ctx.ceph_daemon,
            '--image',
            ctx.ceph[cluster_name].image,
            'bootstrap',
            '--fsid',
            fsid,
            '--mon-id',
            first_mon,
            '--mgr-id',
            first_mgr,
            '--config',
            '{}/seed.{}.conf'.format(testdir, cluster_name),
            '--output-config',
            '{}/{}.conf'.format(testdir, cluster_name),
            '--output-keyring',
            '{}/{}.keyring'.format(testdir, cluster_name),
            '--output-pub-ssh-key',
            '{}/{}.pub'.format(testdir, cluster_name),
        ]
        if mons[first_mon_role].startswith('['):
            cmd += ['--mon-addrv', mons[first_mon_role]]
        else:
            cmd += ['--mon-ip', mons[first_mon_role]]
        if config.get('skip_dashboard'):
            cmd += ['--skip-dashboard']
        # bootstrap makes the keyring root 0600, so +r it for our purposes
        cmd += [
            run.Raw('&&'),
            'sudo',
            'chmod',
            '+r',
            '{}/{}.keyring'.format(testdir, cluster_name),
        ]
        bootstrap_remote.run(args=cmd)

        # register initial daemons
        ctx.daemons.register_daemon(
            bootstrap_remote,
            'mon',
            first_mon,
            cluster=cluster_name,
            fsid=fsid,
            logger=log.getChild('mon.' + first_mon),
            wait=False,
            started=True,
        )
        ctx.daemons.register_daemon(
            bootstrap_remote,
            'mgr',
            first_mgr,
            cluster=cluster_name,
            fsid=fsid,
            logger=log.getChild('mgr.' + first_mgr),
            wait=False,
            started=True,
        )

        # fetch keys and configs
        log.info('Fetching config...')
        ctx.ceph[cluster_name].config_file = teuthology.get_file(
            remote=bootstrap_remote,
            path='{}/{}.conf'.format(testdir, cluster_name))
        log.info('Fetching client.admin keyring...')
        ctx.ceph[cluster_name].admin_keyring = teuthology.get_file(
            remote=bootstrap_remote,
            path='{}/{}.keyring'.format(testdir, cluster_name))
        log.info('Fetching mon keyring...')
        ctx.ceph[cluster_name].mon_keyring = teuthology.get_file(
            remote=bootstrap_remote,
            path='/var/lib/ceph/%s/mon.%s/keyring' % (fsid, first_mon),
            sudo=True)

        # fetch ssh key, distribute to additional nodes
        log.info('Fetching pub ssh key...')
        ssh_pub_key = teuthology.get_file(remote=bootstrap_remote,
                                          path='{}/{}.pub'.format(
                                              testdir, cluster_name)).strip()

        log.info('Installing pub ssh key for root users...')
        ctx.cluster.run(args=[
            'sudo',
            'install',
            '-d',
            '-m',
            '0700',
            '/root/.ssh',
            run.Raw('&&'),
            'echo',
            ssh_pub_key,
            run.Raw('|'),
            'sudo',
            'tee',
            '-a',
            '/root/.ssh/authorized_keys',
            run.Raw('&&'),
            'sudo',
            'chmod',
            '0600',
            '/root/.ssh/authorized_keys',
        ])

        # add other hosts
        for remote in ctx.cluster.remotes.keys():
            if remote == bootstrap_remote:
                continue
            log.info('Writing conf and keyring to %s' % remote.shortname)
            teuthology.write_file(remote=remote,
                                  path='{}/{}.conf'.format(
                                      testdir, cluster_name),
                                  data=ctx.ceph[cluster_name].config_file)
            teuthology.write_file(remote=remote,
                                  path='{}/{}.keyring'.format(
                                      testdir, cluster_name),
                                  data=ctx.ceph[cluster_name].admin_keyring)

            log.info('Adding host %s to orchestrator...' % remote.shortname)
            _shell(ctx, cluster_name, remote,
                   ['ceph', 'orchestrator', 'host', 'add', remote.shortname])

        yield

    finally:
        log.info('Cleaning up testdir ceph.* files...')
        ctx.cluster.run(args=[
            'rm',
            '-f',
            '{}/seed.{}.conf'.format(testdir, cluster_name),
            '{}/{}.pub'.format(testdir, cluster_name),
            '{}/{}.conf'.format(testdir, cluster_name),
            '{}/{}.keyring'.format(testdir, cluster_name),
        ])

        log.info('Stopping all daemons...')

        # this doesn't block until they are all stopped...
        #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])

        # so, stop them individually
        for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES):
            cluster, type_, id_ = teuthology.split_role(role)
            ctx.daemons.get_daemon(type_, id_, cluster).stop()
コード例 #57
0
def task(ctx, config):
    """
    "Thrash" the OSDs by randomly marking them out/down (and then back
    in) until the task is ended. This loops, and every op_delay
    seconds it randomly chooses to add or remove an OSD (even odds)
    unless there are fewer than min_out OSDs out of the cluster, or
    more than min_in OSDs in the cluster.

    All commands are run on mon0 and it stops when __exit__ is called.

    The config is optional, and is a dict containing some or all of:

    min_in: (default 3) the minimum number of OSDs to keep in the
       cluster

    min_out: (default 0) the minimum number of OSDs to keep out of the
       cluster

    op_delay: (5) the length of time to sleep between changing an
       OSD's status

    min_dead: (0) minimum number of osds to leave down/dead.

    max_dead: (0) maximum number of osds to leave down/dead before waiting
       for clean.  This should probably be num_replicas - 1.

    clean_interval: (60) the approximate length of time to loop before
       waiting until the cluster goes clean. (In reality this is used
       to probabilistically choose when to wait, and the method used
       makes it closer to -- but not identical to -- the half-life.)

    scrub_interval: (-1) the approximate length of time to loop before
       waiting until a scrub is performed while cleaning. (In reality
       this is used to probabilistically choose when to wait, and it
       only applies to the cases where cleaning is being performed). 
       -1 is used to indicate that no scrubbing will be done.
  
    chance_down: (0.4) the probability that the thrasher will mark an
       OSD down rather than marking it out. (The thrasher will not
       consider that OSD out of the cluster, since presently an OSD
       wrongly marked down will mark itself back up again.) This value
       can be either an integer (eg, 75) or a float probability (eg
       0.75).

    chance_test_min_size: (0) chance to run test_pool_min_size,
       which:
       - kills all but one osd
       - waits
       - kills that osd
       - revives all other osds
       - verifies that the osds fully recover

    timeout: (360) the number of seconds to wait for the cluster
       to become clean after each cluster change. If this doesn't
       happen within the timeout, an exception will be raised.

    revive_timeout: (150) number of seconds to wait for an osd asok to
       appear after attempting to revive the osd

    thrash_primary_affinity: (true) randomly adjust primary-affinity

    chance_pgnum_grow: (0) chance to increase a pool's size
    chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
    pool_grow_by: (10) amount to increase pgnum by
    max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd

    pause_short: (3) duration of short pause
    pause_long: (80) duration of long pause
    pause_check_after: (50) assert osd down after this long
    chance_inject_pause_short: (1) chance of injecting short stall
    chance_inject_pause_long: (0) chance of injecting long stall

    clean_wait: (0) duration to wait before resuming thrashing once clean

    sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a
                  random live osd

    powercycle: (false) whether to power cycle the node instead
        of just the osd process. Note that this assumes that a single
        osd is the only important process on the node.

    chance_test_backfill_full: (0) chance to simulate full disks stopping
        backfill

    chance_test_map_discontinuity: (0) chance to test map discontinuity
    map_discontinuity_sleep_time: (40) time to wait for map trims

    ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
    chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)

    example:

    tasks:
    - ceph:
    - thrashosds:
        chance_down: 10
        op_delay: 3
        min_in: 1
        timeout: 600
    - interactive:
    """
    if config is None:
        config = {}
    assert isinstance(config, dict), \
        'thrashosds task only accepts a dict for configuration'
    # add default value for sighup_delay
    config['sighup_delay'] = config.get('sighup_delay', 0.1)
    overrides = ctx.config.get('overrides', {})
    teuthology.deep_merge(config, overrides.get('thrashosds', {}))

    if 'powercycle' in config:

        # sync everyone first to avoid collateral damage to / etc.
        log.info('Doing preliminary sync to avoid collateral damage...')
        ctx.cluster.run(args=['sync'])

        if 'ipmi_user' in ctx.teuthology_config:
            for t, key in ctx.config['targets'].iteritems():
                host = t.split('@')[-1]
                shortname = host.split('.')[0]
                from teuthology.orchestra import remote as oremote
                console = oremote.getRemoteConsole(
                    name=host,
                    ipmiuser=ctx.teuthology_config['ipmi_user'],
                    ipmipass=ctx.teuthology_config['ipmi_password'],
                    ipmidomain=ctx.teuthology_config['ipmi_domain'])
                cname = '{host}.{domain}'.format(
                    host=shortname,
                    domain=ctx.teuthology_config['ipmi_domain'])
                log.debug('checking console status of %s' % cname)
                if not console.check_status():
                    log.info('Failed to get console status for '
                             '%s, disabling console...' % cname)
                    console = None
                else:
                    # find the remote for this console and add it
                    remotes = [
                        r for r in ctx.cluster.remotes.keys() if r.name == t
                    ]
                    if len(remotes) != 1:
                        raise Exception('Too many (or too few) remotes '
                                        'found for target {t}'.format(t=t))
                    remotes[0].console = console
                    log.debug('console ready on %s' % cname)

            # check that all osd remotes have a valid console
            osds = ctx.cluster.only(teuthology.is_type('osd'))
            for remote, _ in osds.remotes.iteritems():
                if not remote.console:
                    raise Exception(
                        'IPMI console required for powercycling, '
                        'but not available on osd role: {r}'.format(
                            r=remote.name))

    log.info('Beginning thrashosds...')
    thrash_proc = ceph_manager.Thrasher(ctx.manager,
                                        config,
                                        logger=log.getChild('thrasher'))
    try:
        yield
    finally:
        log.info('joining thrashosds')
        log.info('whsceshi....')
        #thrash_proc.do_join()
        ctx.manager.wait_for_recovery(config.get('timeout', 360))