Esempio n. 1
0
def main():
    args = parser.parse_args()
    MAX_SCRUBS_WEEK = args.MAX_SCRUBS
    MAX_SCRUBS_WEEKEND = args.MAX_SCRUBS_WEEKEND
    if MAX_SCRUBS_WEEKEND is None:
        MAX_SCRUBS_WEEKEND = MAX_SCRUBS_WEEK
    SLEEP = args.SLEEP
    CONF = args.CONF
    AGE = args.AGE
    MIN_HOUR = args.START_HOUR
    MAX_HOUR = args.END_HOUR
    GRAPHITE_PREFIX = args.GRAPHITE_PREFIX

    # connect to cluster
    try:
        cluster = rados.Rados(conffile=CONF)
    except TypeError:
        logger.exception("Failed to connect to ceph cluster")
        sys.exit(1)

    try:
        cluster.connect()
    except Exception:
        logger.exception("Failed to connect to ceph cluster")
        sys.exit(1)

    deep_scrubbing = {}
    while True:

        if not in_scrubbing_window(MIN_HOUR, MAX_HOUR):
            logger.warning("Outside of deep scrubbing hours, will not start")
            if SLEEP:
                time.sleep(120)
                continue
            else:
                sys.exit(0)

        now = datetime.datetime.utcnow()
        if now.isoweekday() >= 6:
            MAX_SCRUBS = MAX_SCRUBS_WEEKEND
        else:
            MAX_SCRUBS = MAX_SCRUBS_WEEK

        # pull PG data
        logger.info("Pulling pg info")
        cmd = {'prefix': 'pg dump', 'format': 'json'}
        ret, buf, out = cluster.mon_command(json.dumps(cmd), b'', timeout=5)
        pg_dump = json.loads(buf)
        pg_stats = pg_dump['pg_stats']

        # check if any previous jobs have finished
        existing_scrubbing = [
            pg for pg in pg_stats if pg['pgid'] in deep_scrubbing
        ]
        logger.info("Checking %s PGs if they have finished scrubbing",
                    len(existing_scrubbing))
        for pg in existing_scrubbing:
            if 'scrubbing+deep' not in pg['state']:
                scrub_completed = datetime.datetime.strptime(
                    pg['last_deep_scrub_stamp'][:-7], "%Y-%m-%d %H:%M:%S")

                if scrub_completed > now - datetime.timedelta(days=AGE):
                    duration = (scrub_completed -
                                deep_scrubbing[pg['pgid']]).total_seconds()
                    logger.info(
                        "pg %s appears to have finished a deep scrub, took %s seconds",
                        pg['pgid'], duration)
                    del deep_scrubbing[pg['pgid']]
                    if GRAPHITE_PREFIX:
                        logger.info("trying to send metric to graphite")
                        send_metric(
                            "{}.deep_scrub.duration".format(GRAPHITE_PREFIX),
                            duration)

                else:
                    logger.warning(
                        "pg %s appears to have not finished a deep scrub, but isn't deep scrubbing",
                        pg['pgid'])
            else:
                logger.info("pg %s is still deep scrubbing", pg['pgid'])

        # find out which OSDs are currently scrubbing
        pgs_scrubbing = [pg for pg in pg_stats if 'scrubbing' in pg['state']]
        pgs_scrubbing.sort(key=lambda k: k['pgid'])
        osds_scrubbing = {}
        for pg in pgs_scrubbing:
            for osd in pg['acting']:
                logger.info("pg %s (%s) uses osd.%s", pg['pgid'], pg['state'],
                            osd)
                try:
                    osds_scrubbing[osd] += 1
                except KeyError:
                    osds_scrubbing[osd] = 1

        logger.info("OSDs scrubbing: %s", len(osds_scrubbing))

        # get deep scrub info
        pgs_scrubbing = [
            pg for pg in pg_stats if 'scrubbing+deep' in pg['state']
        ]

        if len(pgs_scrubbing) >= MAX_SCRUBS:
            logger.info(
                "Currently limited to %s active deep scrubs - pending another deep scrub task to finish",
                MAX_SCRUBS)
            if SLEEP:
                time.sleep(30)
                continue
            else:
                sys.exit()

        # Which PGs have not been deep scrubbed the longest?
        pg_stats.sort(key=lambda k: k['last_deep_scrub_stamp'])
        pgs_scrubbing_stale = [
            pg for pg in pg_stats
            if 'scrubbing+deep' not in pg['state'] and datetime.datetime.
            strptime(pg['last_deep_scrub_stamp'][:-7], "%Y-%m-%d %H:%M:%S") <=
            (now - datetime.timedelta(days=AGE))
        ]

        if GRAPHITE_PREFIX:
            send_metric(
                "{}.deep_scrub.pg_deep_scrub_stale".format(GRAPHITE_PREFIX),
                len(pgs_scrubbing_stale))
            send_metric(
                "{}.deep_scrub.pg_deep_scrub_stale_percent".format(
                    GRAPHITE_PREFIX),
                len(pgs_scrubbing_stale) / float(len(pg_stats)) * 100)

        n_to_trigger = max(0, MAX_SCRUBS - len(pgs_scrubbing))
        i = 0
        n_triggered = 0

        if len(deep_scrubbing) >= MAX_SCRUBS:
            logger.warning(
                "Already tracking %s queued deep scrubs, not queuing more: %s",
                len(deep_scrubbing), deep_scrubbing)
            if SLEEP:
                time.sleep(30)
                continue
            else:
                sys.exit()

        logger.info("Triggering %s deep scrubs", n_to_trigger)
        for pg in pgs_scrubbing_stale:
            i += 1
            logger.info("PG %s last deep scrubbed %s", pg['pgid'],
                        pg['last_deep_scrub_stamp'])

            if pg['pgid'] in skip_pgs:
                logger.warning("Skipping PG %s due to configuration",
                               pg['pgid'])
                continue

            deep_scrub_stamp = datetime.datetime.strptime(
                pg['last_deep_scrub_stamp'][:-7], "%Y-%m-%d %H:%M:%S")

            if deep_scrub_stamp > now - datetime.timedelta(days=AGE):
                logger.warning(
                    "No need to deep scrub, oldest PG was deep scrubbed less than %s days ago on %s",
                    AGE, deep_scrub_stamp)
                if SLEEP:
                    time.sleep(300)
                    continue
                else:
                    sys.exit()

            blocked = False
            for osd in pg['acting']:
                if osd in osds_scrubbing.keys():
                    logger.warning(
                        "Deep scrubbing blocked on pg %s due to osd.%s actively scrubbing",
                        pg['pgid'], osd)
                    blocked = True

            if pg['pgid'] in deep_scrubbing:
                logger.warning(
                    "pg %s already queued or started for deep scrub",
                    pg['pgid'])
                blocked = True

            if not blocked and n_to_trigger > 0:
                # queue deep scrub
                output = commands.getoutput("ceph pg deep-scrub %s" %
                                            pg['pgid'])
                deep_scrubbing[pg['pgid']] = datetime.datetime.utcnow()
                logger.info("Queued pg %s to deep scrub: %s", pg['pgid'],
                            output)

                for osd in pg['acting']:
                    if osd in osds_scrubbing.keys():
                        osds_scrubbing[osd] += 1
                    else:
                        osds_scrubbing[osd] = 1

                n_triggered += 1

                if n_triggered == n_to_trigger:
                    break

            # if we weren't triggering any, this lets loop above print what needs to be scrubbed without scheduling
            # but only for first/oldest 10 PGs
            if n_to_trigger < 1 and i == 10:
                break

        if SLEEP:
            logger.info("Forcing sleep of %s seconds...", SLEEP)
            time.sleep(SLEEP)
        else:
            break

    # Disconnect
    cluster.shutdown()
Esempio n. 2
0
def get_cluster_object(cluster_name, sync_type, since):
    # TODO: for the synced objects that support it, support
    # fetching older-than-present versions to allow the master
    # to backfill its history.

    import rados
    from ceph_argparse import json_command

    # Check you're asking me for something I know how to give you
    assert sync_type in SYNC_TYPES

    # Open a RADOS session
    cluster_handle = rados.Rados(name=RADOS_NAME,
                                 clustername=cluster_name,
                                 conffile='')
    cluster_handle.connect()

    ret, outbuf, outs = json_command(cluster_handle,
                                     prefix='status',
                                     argdict={'format': 'json'},
                                     timeout=RADOS_TIMEOUT)
    status = json.loads(outbuf)
    fsid = status['fsid']

    if sync_type == 'config':
        # Special case for config, get this via admin socket instead of librados
        raw = _get_config(cluster_name)
        version = md5(raw)
        data = json.loads(raw)
    else:
        command, kwargs, version_fn = {
            'mon_status': ('mon_status', {}, lambda d, r: d['election_epoch']),
            'mon_map': ('mon dump', {}, lambda d, r: d['epoch']),
            'osd_map': ('osd dump', {}, lambda d, r: d['epoch']),
            'mds_map': ('mds dump', {}, lambda d, r: d['epoch']),
            'pg_summary': ('pg dump', {
                'dumpcontents': ['pgs_brief']
            }, lambda d, r: md5(msgpack.packb(d))),
            'health': ('health', {
                'detail': ''
            }, lambda d, r: md5(r))
        }[sync_type]
        kwargs['format'] = 'json'
        ret, raw, outs = json_command(cluster_handle,
                                      prefix=command,
                                      argdict=kwargs,
                                      timeout=RADOS_TIMEOUT)
        assert ret == 0

        if sync_type == 'pg_summary':
            data = pg_summary(json.loads(raw))
            version = version_fn(data, raw)
        else:
            data = json.loads(raw)
            version = version_fn(data, raw)

        # Internally, the OSDMap includes the CRUSH map, and the 'osd tree' output
        # is generated from the OSD map.  We synthesize a 'full' OSD map dump to
        # send back to the calamari server.
        if sync_type == 'osd_map':
            ret, raw, outs = json_command(cluster_handle,
                                          prefix="osd tree",
                                          argdict={
                                              'format': 'json',
                                              'epoch': version
                                          },
                                          timeout=RADOS_TIMEOUT)
            assert ret == 0
            data['tree'] = json.loads(raw)
            # FIXME: crush dump does not support an epoch argument, so this is potentially
            # from a higher-versioned OSD map than the one we've just read
            ret, raw, outs = json_command(cluster_handle,
                                          prefix="osd crush dump",
                                          argdict=kwargs,
                                          timeout=RADOS_TIMEOUT)
            assert ret == 0
            data['crush'] = json.loads(raw)

            ret, raw, outs = json_command(cluster_handle,
                                          prefix="osd getcrushmap",
                                          argdict={'epoch': version},
                                          timeout=RADOS_TIMEOUT)
            assert ret == 0

            ret, stdout, outs = transform_crushmap(raw, 'get')
            assert ret == 0
            data['crush_map_text'] = stdout

    return {'type': sync_type, 'fsid': fsid, 'version': version, 'data': data}
Esempio n. 3
0
    def _get_ceph_info(self,
                       timeout,
                       user=None,
                       name=None,
                       clustername=None,
                       conf_defaults=None,
                       conffile=None,
                       conf=None,
                       flags=0):
        try:
            LOG.info("connecting to Ceph instance using %s" %
                     (dict(rados_id=user,
                           name=name,
                           clustername=clustername,
                           conf_defaults=conf_defaults,
                           conffile=conffile,
                           conf=conf,
                           flags=flags)))
            client = rados.Rados(rados_id=user,
                                 name=name,
                                 clustername=clustername,
                                 conf_defaults=conf_defaults,
                                 conffile=conffile,
                                 conf=conf,
                                 flags=flags)
            client.connect(timeout=timeout)
        except Exception as e:
            LOG.info("exception %s while connecting to Ceph with %s" %
                     (e,
                      dict(rados_id=user,
                           name=name,
                           clustername=clustername,
                           conf_defaults=conf_defaults,
                           conffile=conffile,
                           conf=conf,
                           flags=flags)))
            raise e

        LOG.info("connected to Ceph instance using %s" %
                 (dict(rados_id=user,
                       name=name,
                       clustername=clustername,
                       conf_defaults=conf_defaults,
                       conffile=conffile,
                       conf=conf,
                       flags=flags)))
        try:
            mon_list = self._get_mon_list(client)
            fsid = client.get_fsid()
            version = str(client.version())
            pool_list = self._get_pool_capabilities(client)
            cluster_stats = client.get_cluster_stats()

            config_specs = dict()
            if conf_defaults:
                config_specs.update(config_specs)
            if conf:
                config_specs.update(conf)
            config_specs.update(
                dict(fsid=fsid,
                     version=version,
                     user=user,
                     name=name,
                     conffile=conffile,
                     monitors=mon_list['monmap']['mons']))

            capability_specs = dict(
                capacity_total_kb=cluster_stats['kb'],
                capacity_avail_kb=cluster_stats['kb_avail'],
                capacity_used_kb=cluster_stats['kb_used'],
                data_type='unified (object, block, file)',
                data_efficiency='thin provision',
                data_services=
                'striping,in-memory caching,copy-on-write cloning,snapshots,incremental backups',
                vendor_services='caching tier',
                performance_IOPS='')

            return dict(name=clustername,
                        driver=CEPH_DRIVER,
                        config_specs=config_specs,
                        capability_specs=capability_specs,
                        tiers=pool_list)
        except Exception as e:
            LOG.info("exception %s while connecting to Ceph with %s" %
                     (e,
                      dict(rados_id=user,
                           name=name,
                           clustername=clustername,
                           conf_defaults=conf_defaults,
                           conffile=conffile,
                           conf=conf,
                           flags=flags)))
            raise e

        return list()
Esempio n. 4
0
File: rbd.py Progetto: afliu/glance
    def add(self, image_id, image_file, image_size):
        """
        Stores an image file with supplied identifier to the backend
        storage system and returns a tuple containing information
        about the stored image.

        :param image_id: The opaque image identifier
        :param image_file: The image data to write, as a file-like object
        :param image_size: The size of the image data to write, in bytes

        :retval tuple of URL in backing store, bytes written, checksum
                and a dictionary with storage system specific information
        :raises `glance.common.exception.Duplicate` if the image already
                existed
        """
        checksum = hashlib.md5()
        image_name = str(image_id)
        with rados.Rados(conffile=self.conf_file, rados_id=self.user) as conn:
            fsid = None
            if hasattr(conn, 'get_fsid'):
                fsid = conn.get_fsid()
            with conn.open_ioctx(self.pool) as ioctx:
                order = int(math.log(self.chunk_size, 2))
                LOG.debug('creating image %s with order %d and size %d',
                          image_name, order, image_size)
                if image_size == 0:
                    LOG.warning(
                        _("since image size is zero we will be doing "
                          "resize-before-write for each chunk which "
                          "will be considerably slower than normal"))

                try:
                    loc = self._create_image(fsid, ioctx, image_name,
                                             image_size, order)
                except rbd.ImageExists:
                    raise exception.Duplicate(
                        _('RBD image %s already exists') % image_id)
                try:
                    with rbd.Image(ioctx, image_name) as image:
                        bytes_written = 0
                        offset = 0
                        chunks = utils.chunkreadable(image_file,
                                                     self.chunk_size)
                        for chunk in chunks:
                            # If the image size provided is zero we need to do
                            # a resize for the amount we are writing. This will
                            # be slower so setting a higher chunk size may
                            # speed things up a bit.
                            if image_size == 0:
                                chunk_length = len(chunk)
                                length = offset + chunk_length
                                bytes_written += chunk_length
                                LOG.debug(
                                    _("resizing image to %s KiB") %
                                    (length / units.Ki))
                                image.resize(length)
                            LOG.debug(
                                _("writing chunk at offset %s") % (offset))
                            offset += image.write(chunk, offset)
                            checksum.update(chunk)
                        if loc.snapshot:
                            image.create_snap(loc.snapshot)
                            image.protect_snap(loc.snapshot)
                except Exception as exc:
                    # Delete image if one was created
                    try:
                        self._delete_image(loc.image, loc.snapshot)
                    except exception.NotFound:
                        pass

                    raise exc

        # Make sure we send back the image size whether provided or inferred.
        if image_size == 0:
            image_size = bytes_written

        return (loc.get_uri(), image_size, checksum.hexdigest(), {})
def rados_functional_test(conf="",
                          key="",
                          poolName="",
                          objectName="",
                          localName="",
                          keepFiles=False,
                          objectSize=1024):
    #NOTE rados functional test does not issue a copy command because there does not appear to be one

    if "" in [conf, key, poolName, objectName, localName]:
        print('rados_functional_test: argument missing')
        return None

    radosMetrics = copy.deepcopy(metrics)
    radosMetrics['protocol'] = 'rados'

    timeEpoch = time.time()
    timeConducted = time.asctime(time.gmtime(timeEpoch))

    localNamePre = localName
    localNamePost = localName + ".post"

    if not os.path.exists(localNamePre):
        gen_data(name=localNamePre, sizeBytes=objectSize)
    else:
        print("Using existing file {} for test object".format(localNamePre))

    cluster = rados.Rados(conffile=conf)
    #cluster = rados.Rados(conffile, conf = dict (keyring=keyringfile))

    cluster.connect()

    if not cluster.pool_exists(poolName):
        print("bad pool name")
        sys.exit(0)

    try:
        #open file like object, write to, read from, delete
        ioctx = cluster.open_ioctx(poolName)
        radosMetrics['open_success'] = True
    except:
        radosMetrics['open_success'] = False

    #time write action
    tic, toc, tac, toe = [], [], [], []
    with open(localNamePre, 'rb') as localDataFile:
        localData = localDataFile.read()

        try:
            tic.append(time.time())
            ioctx.write_full(objectName, localData)
            radosMetrics['write_success'] = True
        except:
            radosMetrics['write_success'] = False

        toc.append(time.time())
        """ while True:
            chunk=data.read(chunkSize)
            if not chunk:
                print("break")
                break
            print("chunk")
            ioctx.aio_append(objectName,chunk)"""

    #check that can read the object
    try:
        object_size, objectTime = ioctx.stat(objectName)
        radosMetrics["stat_success"] = True
    except:
        radosMetrics["stat_success"] = False

    if os.path.exists(localNamePost):
        os.remove(localNamePost)

    with open(localNamePost, 'wb') as localDataFile:

        try:
            tac.append(time.time())
            localData = ioctx.read(objectName, length=object_size)
            radosMetrics["read_success"] = True
        except:
            radosMetrics["read_success"] = False
        toe.append(time.time())

        localDataFile.write(localData)

    if filecmp.cmp(localNamePre, localNamePost):
        print("RADOS SUCCESS: read and write file matching")
    else:
        print("RADOS FAIL: read and write file not matching")

    try:
        ioctx.remove_object(objectName)
        radosMetrics["remove_success"] = True
    except:
        radosMetrics["remove_success"] = False
    ioctx.close()

    #delete local files
    if not keepFiles:
        os.remove(localNamePre)
        os.remove(localNamePost)

    # assign metrics
    radosMetrics["conducted"] = timeConducted
    radosMetrics["conducted_epoch"] = timeEpoch
    radosMetrics["time_write"] = toc[0] - tic[0]
    radosMetrics["time_read"] = toe[0] - tac[0]
    radosMetrics["object_size"] = object_size
    radosMetrics["pool_name"] = poolName

    return radosMetrics
Esempio n. 6
0
 def __init__(self, pool_name):
     cluster = rados.Rados(conffile='/etc/ceph/ceph.conf')
     cluster.connect()
     if not cluster.pool_exists(pool_name):
         raise RuntimeError('No data pool exists')
     self.ioctx = cluster.open_ioctx(pool_name)
Esempio n. 7
0
def define_luns(gateway):
    """
    define the disks in the config to LIO
    :param gateway: (object) gateway object - used for mapping
    :return: None
    """

    local_gw = this_host()

    ipv4_list = []
    for iface in netifaces.interfaces():
        dev_info = netifaces.ifaddresses(iface).get(netifaces.AF_INET, [])
        ipv4_list += [dev['addr'] for dev in dev_info]

    # sort the disks dict keys, so the disks are registered in a specific
    # sequence
    disks = config.config['disks']
    srtd_disks = sorted(disks)
    pools = {disks[disk_key]['pool'] for disk_key in srtd_disks}

    if pools:
        with rados.Rados(conffile=settings.config.cephconf) as cluster:

            for pool in pools:

                logger.debug("Processing rbd's in '{}' pool".format(pool))

                with cluster.open_ioctx(pool) as ioctx:

                    pool_disks = [
                        disk_key for disk_key in srtd_disks
                        if disk_key.startswith(pool)
                    ]
                    for disk_key in pool_disks:

                        pool, image_name = disk_key.split('.')

                        try:
                            with rbd.Image(ioctx, image_name) as rbd_image:
                                image_bytes = rbd_image.size()
                                image_size_h = human_size(image_bytes)

                                rbd_lock_cleanup(ipv4_list, rbd_image)

                                lun = LUN(logger, pool, image_name,
                                          image_size_h, local_gw)
                                if lun.error:
                                    halt("Error defining rbd image "
                                         "{}".format(disk_key))

                                lun.allocate()
                                if lun.error:
                                    halt("Error unable to register {} with "
                                         "LIO - {}".format(
                                             disk_key, lun.error_msg))

                        except rbd.ImageNotFound:
                            halt("Disk '{}' defined to the config, but image "
                                 "'{}' can not be found in "
                                 "'{}' pool".format(disk_key, image_name,
                                                    pool))

        # Gateway Mapping : Map the LUN's registered to all tpg's within the
        # LIO target
        gateway.manage('map')
        if gateway.error:
            halt("Error mapping the LUNs to the tpg's within the iscsi Target")

    else:
        logger.info("No LUNs to export")
Esempio n. 8
0
    expected_bt = [(c, di)] + expected_bt
    di = mkdir(ceph, d)
    c, f = get_name(d, i, 2)
    fi = create(ceph, f)
    expected_bt = [(c, di)] + expected_bt
    return fi, expected_bt

test = -1
if len(sys.argv) > 1:
    test = int(sys.argv[1])

conf = ''
if len(sys.argv) > 2:
    conf = sys.argv[2]

radosobj = rados.Rados(conffile=conf)
radosobj.connect()
ioctx = radosobj.open_ioctx('data')

ceph = cephfs.LibCephFS(conffile=conf)
ceph.mount()

rooti = ceph.stat('/')['st_ino']

test = -1
if len(sys.argv) > 1:
    test = int(sys.argv[1])

conf = '/etc/ceph/ceph.conf'
if len(sys.argv) > 2:
    conf = sys.argv[2]
Esempio n. 9
0
  if omap_key_count:
    usage('only define omap-key-count for an omap test')
  if omap_value_size:
    usage('only define omap-value-size for an omap test')
max_qdepth_seen = 0
if debug: print('check_every %d time units' % check_every)
response_times = []
sampled_rsp_times = [ 0.01 for k in range (0, 3) ]
per_thread_obj_name = '%s-%s' % (omap_obj_name, thread_id)

# if you add this to ceph.conf file, 
# then you don't need to specify keyring in Rados constructor
#   keyring = /root/ben/ceph.client.admin.keyring
# alternatively don't use cephx

with rados.Rados(conffile=ceph_conf_file, conf=dict(keyring=keyring_path)) as cluster:
    #print(cluster.get_fsid())
    pools = cluster.list_pools()
    if not pools.__contains__(mypool):
      cluster.create_pool(mypool) # FIXME: race condition if multiple threads
      print('created pool ' + mypool)
    ioctx = cluster.open_ioctx(mypool)

    # wait until all threads are ready to run

    await_starting_gun()

    # do the workload

    start_time = time.time()
    elapsed_time = -1.0
Esempio n. 10
0
VM_ID_LIST = []
VM_NAME = []
VM = {}
PM_list = []
pm_chosen = ""
pm_len = 0
mark = 0


POOL_NAME = 'try-pool'
CONF_FILE = '/etc/ceph/ceph.conf'
BLOCK_CONFIG_XML = 'attach.xml'
HOST_NAME = 'vishrut-Vostro-1015'

########################################## Connection ############################################################
radosConnection = rados.Rados(conffile=CONF_FILE)
radosConnection.connect()
if POOL_NAME not in radosConnection.list_pools():                                
            radosConnection.create_pool(POOL_NAME)
ioctx = radosConnection.open_ioctx(POOL_NAME)

rbdInstance = rbd.RBD()

###################################################################################################################
def getHostName():
     
    global HOST_NAME
    monProc = subprocess.Popen("ceph mon_status", shell=True, bufsize=0, stdout=subprocess.PIPE, universal_newlines=True)
    monDict = eval(monProc.stdout.read())
    HOST_NAME = monDict['monmap']['mons'][0]['name']
Esempio n. 11
0
def ceph_command(command):
    cluster = rados.Rados(conffile=CEPH_CONF,
                          conf=dict(keyring=CEPH_KEYRING),
                          name=CEPH_USER)
    run_mon_command = True
    try:
        cluster.connect()
    except:
        print "Something prevented the connection to the Ceph cluster. Check your CEPH_USER and CEPH_KEYRING settings."
        exit()

    if command == "blocked requests":
        run_mon_command = False
        output = subprocess.check_output([
            './scripts/blocked_requests.sh', CEPH_CONF, CEPH_USER, CEPH_KEYRING
        ])
    elif command == "down osds" or command == "down osd":
        cmd = {"prefix": "osd tree", "format": "json"}
    elif command == "io":
        run_mon_command = False
        output = subprocess.check_output(
            ['./scripts/io.sh', CEPH_CONF, CEPH_USER, CEPH_KEYRING])
    elif command.startswith("pool io"):
        opt_pool = command.split("pool io")[1].strip().lower()
        run_mon_command = False
        output = subprocess.check_output([
            './scripts/pool_io.sh', CEPH_CONF, CEPH_USER, CEPH_KEYRING,
            opt_pool
        ])
    else:
        cmd = {"prefix": command, "format": "plain"}
    if run_mon_command:
        try:
            ret, output, errs = cluster.mon_command(json.dumps(cmd),
                                                    b'',
                                                    timeout=5)
        except:
            return "Something went wrong while executing " + command + " on the Ceph cluster.", None
    cluster.shutdown()

    if command == "down osds" or command == "down osd":
        output = json.loads(output)
        lastroot = None
        lasthost = None
        msg = ""
        for item in output['nodes']:
            if item['type'] == 'root':
                root = item['name']
            elif item['type'] == 'host':
                host = item['name']
            elif item['type'] == 'osd' and item['status'] == 'down':
                osd = item['name']
                if not root == lastroot:
                    msg = msg + "\n" + root + "\n    " + host + "\n        " + osd
                elif not host == lasthost:
                    msg = msg + "\n    " + host + "\n        " + osd
                else:
                    msg = msg + ", " + osd
                lastroot = root
                lasthost = host
        output = msg.strip()
        if output == "":
            output = "All OSDs are up."

    output = output.strip()
    if output and len(output.split('\n')) < TOO_LONG:
        return output, None
    elif output and len(output.split('\n')) >= TOO_LONG:
        return TOO_LONG_MSG, output
    else:
        return "Something went wrong while executing '" + command + "' on the Ceph cluster.", None
Esempio n. 12
0
 def __init__(self, pool, client_name, namespace=None):
     self.pool = pool
     self.namespace = namespace
     self.rados = rados.Rados(conffile="/etc/ceph/ceph.conf",
                              name=client_name)
     self.rados.connect()
Esempio n. 13
0
def main(argv):
	global warn


	mutex = threading.Lock()

        try:
                cluster = rados.Rados(conffile='')
        except TypeError as e:
                print 'Argument validation error: ', e
                raise e

        print "Created cluster handle."

        try:
                cluster.connect()
        except Exception as e:
                print "connection error: ", e
                raise e
        finally:
                print "Connected to the cluster."






        s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        try:
                os.remove( socket_path )

        except :
		pass

        #s.bind( socket_path )
	try:
        	s.bind( socket_path )
	except :
		#os._exit(0)
		pass


	#uid = pwd.getpwnam("zabbix")
	uid = pwd.getpwnam(argv[1])
	os.chown(socket_path, uid.pw_uid, uid.pw_gid)

        s.listen(1)

	def ceph_rbd_du():
		global rbd_du_out
		
		while 1:
			starttime = datetime.datetime.now()
                	outbuf = ""
			target = ('mon', '')
			timeout=0
			verbose=False
			inbuf=''
			cmddict = {}
			cmddict.update({'prefix': u'osd lspools', 'format': 'json'})
			ret, outbuf, outs = send_command(cluster, target, [json.dumps(cmddict)],inbuf, timeout, verbose)
			lspools = json.loads(outbuf)
			poollist = []
			for item in lspools:
				d = {}
				#print item["poolname"]
				cmdstr = "rbd -p "+ item["poolname"] + " du"
				#print cmdstr
				images = []
				status, outbuf = commands.getstatusoutput(cmdstr)
				outarray = outbuf.split('\n')
				del outarray[0]
				del outarray[len(outarray)-1]
				for index,value in enumerate(outarray):
					a = value.split(' ')
					tmparray = []
					for index,tmpstr in enumerate(a):
						if ( len(tmpstr) >= 1 ):
							tmparray.append(tmpstr)
					image = {}
					image["used"] = tmparray[2]
					image["provisioned"] = tmparray[1]
					image["name"] = tmparray[0]
					images.append(image)

				d["images"] = images
				d["poolname"] = item["poolname"]
				poollist.append(d)
			mutex.acquire()
			rbd_du_out = json.dumps(poollist)
			mutex.release()
			#print rbd_du_out
			endtime = datetime.datetime.now()
			#print (endtime - starttime).seconds
			if ( (endtime - starttime).seconds < 300 ):
				time.sleep( 300 - (endtime - starttime).seconds )

	t = threading.Thread(target=ceph_rbd_du)
    	t.setDaemon(True)
	t.start()

	
	def watch_cb(arg, line, who, stamp_sec, stamp_nsec, seq, level, msg):
		#print(line)
		#sys.stdout.flush()
		find_warn(line)
		mutex.acquire()
		cluster_log_list.insert(0,line)
		mutex.release()


	# this instance keeps the watch connection alive, but is
	# otherwise unused
	level = "info"
	run_in_thread(cluster.monitor_log, level, watch_cb, 0)

        warn = "0"
        while 1:

                conn, addr = s.accept()

                cmd = conn.recv(1024)

                if not cmd: break

                print cmd

                outbuf = ""
		target = ('mon', '')
		timeout=0
		verbose=False
		inbuf=''
		cmddict = {}

                if ( cmd == "rbd_du" ):
			print rbd_du_out
			mutex.acquire()
			conn.send(rbd_du_out)
			mutex.release()
                	conn.close()
			continue
			
                elif ( cmd == "log" ):
			outbuf = ''

			mutex.acquire()
			for log_str in cluster_log_list:
				outbuf += log_str + "\n"
			del cluster_log_list[:]
			mutex.release()

			conn.send(outbuf)
                	conn.close()
			continue

                elif ( cmd == "warn" ):
                        conn.send( str(warn) )
                	conn.close()
			continue


                elif ( cmd == "status" ): 
			cmddict.update({'prefix': u'status', 'format': 'json'})
                elif( cmd == "osd_pool_stats" ):
			cmddict.update({'prefix': u'osd pool stats', 'format': 'json'})
                elif ( cmd == "osd_tree" ):
			cmddict.update({'prefix': u'osd tree', 'format': 'json'})
                elif ( cmd == "osd_df" ):
			cmddict.update({'prefix': u'osd df', 'format': 'json'})
                elif (cmd == "pg_stat" ):
			cmddict.update({'prefix': u'pg stat', 'format': 'json'})
                elif (cmd == "osd_perf" ):
			cmddict.update({'prefix': u'osd perf', 'format': 'json'})
                elif (cmd == "df" ):
			cmddict.update({'prefix': u'df', 'format': 'json'})
			

		ret, outbuf, outs = send_command(cluster, target, [json.dumps(cmddict)],inbuf, timeout, verbose)
		find_warn(outbuf)
		conn.send(outbuf)
                conn.close()
Esempio n. 14
0
    def _cluster_connect(self) -> None:
        logger.debug("Starting cluster connection thread")
        logged_missing_config_file: bool = False
        while self._run:
            try:
                if not self._cluster:
                    try:
                        # uses /etc/ceph/ceph.client.admin.keyring
                        # really should do separate keys per node so they can be
                        # evicted if necessary if nodes are decommisioned
                        self._cluster = rados.Rados(
                            conffile="/etc/ceph/ceph.conf")
                        logger.info("Got cluster handle")
                    except rados.ObjectNotFound as e:
                        if not logged_missing_config_file:
                            logger.info(
                                f"Can't get cluster handle: '{e}' - will keep retrying"
                            )
                            logged_missing_config_file = True
                if self._cluster and self._cluster.state != "connected":
                    # this can throw (auth failed, etc.)
                    logger.info("Connecting to cluster")
                    self._cluster.connect()
                    logger.info("Cluster connected")
                    has_aquarium_pool = "aquarium" in self._cluster.list_pools(
                    )
                    if not has_aquarium_pool:
                        logger.info("Creating aquarium pool")
                        # TODO: consider setting pg_num 1 as with device_health_metrics pool
                        self._cluster.create_pool("aquarium")
                    self._ioctx = self._cluster.open_ioctx("aquarium")
                    self._ioctx.application_enable("aquarium")
                    # This actually seems to be safe (doesn't trash existing omap
                    # data if present, which is neat)
                    self._ioctx.write_full(
                        "kvstore",
                        "# aquarium kv store is in this object's omap\n".
                        encode("utf-8"),
                    )
                    # At this point, if it's a new pool, new object, etc.
                    # we need to push everything from our local cache to
                    # the omap on our kvstore, to populate it with whatever
                    # may have been set pre-bootstrap.
                    keys = self._db.keys()
                    values = list(self._db[k] for k in keys)
                    if keys and not has_aquarium_pool:
                        try:
                            with rados.WriteOpCtx() as op:
                                # This is a neat trick to make sure we've got version 1
                                # of the kvstore object, which will only be the case with
                                # a newly created object in a new pool.  If the object
                                # somehow already exists with a greater version, an
                                # exception will be raised with errno set to ERANGE when
                                # we try to perform the write op.  I'm having an extremely
                                # hard time seeing how this would be hit in normal operation
                                # (it'd have to be a very bizarre race or bug somewhere),
                                # but since we can handle it, let's do so.
                                op.assert_version(1)
                                self._ioctx.set_omap(op, keys,
                                                     values)  # type: ignore
                                self._ioctx.operate_write_op(op, "kvstore")
                                logger.info(
                                    f"Pushed {keys} to kvstore in newly created aquarium pool"
                                )
                        except rados.OSError as e:
                            if e.errno == errno.ERANGE:
                                logger.warning(
                                    f"kvstore object already exists in aquarium pool, not pushing local cache"
                                )
                            else:
                                raise
                    # Arguably we really only need the config watch if any watches are
                    # requested on specific keys; having one here all the time is not
                    # strictly necessary, but makes the implementation simpler.
                    # TODO: need timeouts, error handlers etc on watch
                    self._config_watch = self._ioctx.watch(
                        "kvstore", self._config_notify)
                    logger.debug(
                        f"config watch id is {self._config_watch.get_id()}")
                    # will raise:
                    # rados.ObjectNotFound: [errno 2] RADOS object not found (watch error)
            except Exception as e:
                # e.g. RADOS state (You cannot perform that operation on a Rados object in state configuring.)
                logger.exception(str(e))

            # TODO: Should we sleep for longer?  A minute instead of 10 seconds?  This is pretty arbitrary...
            logger.debug("Cluster connection thread sleeping for 10 seconds")
            self._event.wait(10)
            self._event.clear()
            # How on earth to we detect that the cluster has gone away
            # and trigger a reconnect?
            # What happens if we try a write op on a down cluster?
            # might want watch checks?

        logger.debug("Shutting down cluster connection")
        if self._config_watch:
            self._config_watch.close()
            # Need to set this to None, so it's deallocated before the
            # cluster is deallocated/shutdown, or we get:
            # Traceback (most recent call last):
            #   File "rados.pyx", line 477, in rados.Rados.require_state
            # rados.RadosStateError: RADOS rados state (You cannot perform that operation on a Rados object in state shutdown.)
            # Exception ignored in: 'rados.Watch.__dealloc__'
            # Traceback (most recent call last):
            #   File "rados.pyx", line 477, in rados.Rados.require_state
            # rados.RadosStateError: RADOS rados state (You cannot perform that operation on a Rados object in state shutdown.)
            self._config_watch = None
            # Note: https://github.com/ceph/ceph/pull/43107 fixes the
            # above, so we can get rid of this once that lands everywhere.
        if self._ioctx:
            self._ioctx.close()
        if self._cluster:
            self._cluster.shutdown()
        logger.debug("Cluster connection is shut down")
Esempio n. 15
0
def main():
    logging.basicConfig(level=logging.INFO)
    conf = {'keyring': 'keyring.conf'}
    pool = 'single'
    MODE = 'HOST'  # HOST or OSD
    secs = 10 # secs to benchmark
    bytesperobj = 4 * 1024 * 1024
    bigdata = cycle([os.urandom(bytesperobj), os.urandom(bytesperobj)])

    assert MODE in ('HOST', 'OSD')

    log.info('Attaching to CEPH cluster. pool=%s', pool)
    with rados.Rados(conffile='/etc/ceph/ceph.conf', conf=conf) as cluster:
        log.info('Getting map osd -> host.')
        #info = json.loads(subprocess.check_output(['ceph', 'osd', 'tree', '--format=json']).decode('utf-8'))
        info = _cmd(cluster, 'osd tree')
        osd2host = {}
        for i in info['nodes']:
            if i ['type'] != 'host':
                continue
            for j in i['children']:
                osd2host[j] = i['name']
        pool_id = cluster.pool_lookup(pool)


        log.info('Getting pg => acting set.')
        #info = json.loads(subprocess.check_output(['ceph', '--format=json', 'pg', 'dump', 'pgs_brief']).decode('utf-8'))
        info = _cmd(cluster, 'pg dump', dumpcontents=['pgs_brief'])


        pgid2acting = {i['pgid']:tuple(i['acting']) for i in info if i['pgid'].startswith(str(pool_id))}
        if MODE == 'HOST':
            bench_items = set(tuple(osd2host[i] for i in osds) for osds in pgid2acting.values())
        else:
            bench_items = set(pgid2acting.values())


        log.info('Figuring out object names for %d %s combinations.', len(bench_items), MODE)
        obj2info = dict()
        cnt = 0
        totlen=len(bench_items)
        while bench_items:
            cnt = cnt + 1
            name = 'bench_%d' % cnt

            #info = json.loads(subprocess.check_output(['ceph', '-f', 'json', 'osd', 'map', pool, name]).decode('utf-8'))
            info = _cmd(cluster, 'osd map', object=name, pool=pool)

            acting = tuple(info['acting'])
            hosts = tuple(osd2host[osd] for osd in acting)

            if MODE == 'HOST':
                bench_item = hosts
            else:
                bench_item = acting

            if bench_item not in bench_items:
                continue

            bench_items.remove(bench_item)
            log.info('Found %d/%d', totlen-len(bench_items), totlen)

            obj2info[name] = (hosts, acting)

        obj2info=dict(sorted(obj2info.items(), key=lambda i: i[1]))

        log.debug('Opening IO context for pool %s.', pool)
        with cluster.open_ioctx(pool) as ioctx:
            log.info('Start benchmarking of %d %ss. %d * 2 seconds each.', len(obj2info), MODE, secs)
            for (name, (hosts, acting)) in obj2info.items():
                log.debug('Benchmarking IOPS on OSD %r (%r)', list(acting), ','.join(hosts))
                delay, ops = do_bench(secs, name, ioctx, cycle([b'q', b'w']))
                iops = ops / delay
                lat = delay / ops # in sec
                log.debug('Benchmarking Linear write on OSD %r (%r) blocksize=%d MiB', list(acting), ','.join(hosts), bytesperobj//(1024*1024))
                delay, ops = do_bench(secs, name, ioctx, bigdata)
                bsec = ops * bytesperobj / delay

                log.info(
                    'OSD %r (%r): %2.2f IOPS, lat=%.4f ms. %2.2f MB/sec (%2.2f Mbit/s).',
                    list(acting),
                     ','.join(hosts),
                    iops,
                    lat * 1000,
                    bsec / 1000000,
                    bsec * 8 / 1000000,
                )
# This script will connect Spark with Ceph
# We can read the data from Ceph to Spark, and
# we also can write data to Ceph from Spark

import rados

# Connect Ceph cluster
try:
    cluster = rados.Rados(
        conffile='/home/cc/SIRIUS-Ceph/docker/configdir/ceph.conf')
except TypeError as e:
    print 'Argument validation error: ', e
    raise e

print "Created cluster handle."

try:
    cluster.connect()
except Exception as e:
    print "connection error: ", e
    raise e
finally:
    print "Connected to the cluster."

# List Ceph pools
print "Available Pools:"
print "----------------"
pools = cluster.list_pools()
for pool in pools:
    print pool
Esempio n. 17
0
from SimpleHTTPServer import SimpleHTTPRequestHandler
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
import DelQueType
import create_vm
import resource_dis
import json
import rados, rbd
import os
import attach_vol
import detach_vol
HandlerClass = SimpleHTTPRequestHandler
ServerClass  = BaseHTTPServer.HTTPServer
PORT_NUMBER = 80
POOL_NAME="rbd"
HOST_NAME='chitra-HP-Pavilion-g6-Notebook-PC' #todo
radosConn=rados.Rados(conffile='/etc/ceph/ceph.conf')
radosConn.connect()
POOL_NAME="rbd"
if POOL_NAME not in radosConn.list_pools():
	radosConn.create_pool(POOL_NAME)
ioctx = radosConn.open_ioctx(POOL_NAME)
rbdInstance = rbd.RBD()
#VOLUME_LIST={}
def json_out(out):
	  return json.dumps(out,indent=4)
class My_handler(BaseHTTPRequestHandler):
	def do_GET(self):
		print self.path
		url=self.path
		if '/vm/create' in url:
			whattodo = url
Esempio n. 18
0
File: rbd.py Progetto: wech71/benji
    def __init__(self, *, config: Config, name: str,
                 module_configuration: ConfigDict, url: str,
                 block_size: int) -> None:
        super().__init__(config=config,
                         name=name,
                         module_configuration=module_configuration,
                         url=url,
                         block_size=block_size)

        if self.parsed_url.username or self.parsed_url.password or self.parsed_url.hostname or self.parsed_url.port \
                    or self.parsed_url.params or self.parsed_url.fragment:
            raise UsageError('The supplied URL {} is invalid.'.format(
                self.url))
        if self.parsed_url.query:
            try:
                extra_ceph_conf = parse_qs(self.parsed_url.query,
                                           keep_blank_values=True,
                                           strict_parsing=True,
                                           errors='strict')
            except (ValueError, UnicodeError) as exception:
                raise UsageError('The supplied URL {} is invalid.'.format(
                    self.url)) from exception

            # parse_qs returns the values as lists, only consider the first appearance of each key in the query string.
            extra_ceph_conf = {
                key: value[0]
                for key, value in extra_ceph_conf.items()
            }
        else:
            extra_ceph_conf = {}

        ceph_config_file = config.get_from_dict(module_configuration,
                                                'cephConfigFile',
                                                types=str)
        if 'client_identifier' in extra_ceph_conf:
            client_identifier = extra_ceph_conf['client_identifier']
            del extra_ceph_conf['client_identifier']
        else:
            client_identifier = config.get_from_dict(module_configuration,
                                                     'clientIdentifier',
                                                     types=str)

        self._cluster = rados.Rados(conffile=ceph_config_file,
                                    rados_id=client_identifier,
                                    conf=extra_ceph_conf)
        self._cluster.connect()
        # create a bitwise or'd list of the configured features
        self._new_image_features = 0
        for feature in config.get_from_dict(module_configuration,
                                            'newImageFeatures',
                                            types=list):
            try:
                self._new_image_features = self._new_image_features | getattr(
                    rbd, feature)
            except AttributeError:
                raise ConfigurationError(
                    '{}: Unknown image feature {}.'.format(
                        module_configuration.full_name, feature))

        self._pool_name = None
        self._image_name = None
        self._snapshot_name = None

        self._simultaneous_reads = config.get_from_dict(module_configuration,
                                                        'simultaneousReads',
                                                        types=int)
        self._simultaneous_writes = config.get_from_dict(module_configuration,
                                                         'simultaneousWrites',
                                                         types=int)
        self._read_executor: Optional[JobExecutor] = None
        self._write_executor: Optional[JobExecutor] = None
Esempio n. 19
0
 def __init_cluster(self):
     cluster = rados.Rados(rados_id=self.rid, conffile=self.r_conf)
     cluster.connect()
     return cluster
Esempio n. 20
0
import rados, sys

cluster = rados.Rados(conffile='../ceph/ceph-cluster/ceph.conf')

cluster.connect()
if not cluster.pool_exists('data'):
    cluster.create_pool('data')

#WRITING, READING AND REMOVING OBJECTS
ioctx = cluster.open_ioctx('data')

print "\nWriting object 'hw' with contents 'Hello World!' to pool 'data'."
ioctx.write_full("hw", "Hello World!")

print "\n\nContents of object 'hw' \n------------------------\n"
print ioctx.read("hw")

#WRITING AND READING XATTRS

print "\n\nWriting XATTR 'lang' with value 'en_US' to object 'hw'"
ioctx.set_xattr("hw", "lang", "en_US")

print "\n\nGetting XATTR 'lang' from object 'hw'\n"
print ioctx.get_xattr("hw", "lang")

#LISTING OBJECTS
ioctx.write_full("test", "my hello world!")
object_iterator = ioctx.list_objects()

while True:
    try:
Esempio n. 21
0
 def _connect(self):
     """
     Connect to Ceph cluster
     """
     self.cluster = rados.Rados(conffile=self.settings['conf'])
     self.cluster.connect()
Esempio n. 22
0
#!/usr/bin/env python

import cephfs
import rados
import sys

conf_path = "/etc/ceph/ceph.conf"
cluster_name = "ceph"
auth_id = "admin"
rados = rados.Rados(name="client.{0}".format(auth_id),
                    clustername=cluster_name,
                    conffile=conf_path,
                    conf={})
rados.connect()

fs = cephfs.LibCephFS(rados_inst=rados)
fs.init()
fs.mount()

keys = [
    "ceph.dir.layout.pool", "ceph.dir.layout.pool_namespace",
    "ceph.quota.max_bytes", "ceph.quota.max_files"
]

for arg in sys.argv[1:]:
    for key in keys:
        print("print {}@{}".format(key, arg))
        try:
            value = fs.getxattr(arg, key)
            print(type(value), ",", value)
        except cephfs.NoData:
Esempio n. 23
0
parser = argparse.ArgumentParser(description='Discover ceph OSDs which have not yet been prepared and prepare them.')
parser.add_argument('--max-scrubs', dest='MAX_SCRUBS', type=int, default=0,
                    help='Maximum number of scrubs to trigger (default: %(default)s)')
parser.add_argument('--sleep', dest='SLEEP', type=int, default=0,
                    help='Sleep this many seconds then run again, looping forever. 0 disables looping. (default: %(default)s)')
parser.add_argument('--conf', dest='CONF', type=str, default="/etc/ceph/ceph.conf",
                    help='Ceph config file. (default: %(default)s)')

args = parser.parse_args()
MAX_SCRUBS = args.MAX_SCRUBS
SLEEP = args.SLEEP
CONF = args.CONF

# Connect to cluster
try:
  cluster = rados.Rados(conffile=CONF)
except TypeError as e:
  print 'Argument validation error: ', e
  raise e

try:
  cluster.connect()
except Exception as e:
  print "connection error: ", e
  raise e

while(True):

  print "Dumping pg info."

  cmd = {'prefix': 'pg dump', 'format': 'json'}
Esempio n. 24
0
def cluster_connect(pool, conffile, rados_id):
    cluster = rados.Rados(conffile=conffile, rados_id=rados_id)
    cluster.connect()
    ioctx = cluster.open_ioctx(pool)
    return ioctx
Esempio n. 25
0
def rados_commands(fsid, cluster_name, commands):
    """
    Passing in both fsid and cluster_name, because the caller
    should always know both, and it saves this function the trouble
    of looking up one from the other.
    """

    import rados
    from ceph_argparse import json_command

    # Open a RADOS session
    cluster_handle = rados.Rados(name=RADOS_NAME,
                                 clustername=cluster_name,
                                 conffile='')
    cluster_handle.connect()

    results = []

    # Each command is a 2-tuple of a prefix followed by an argument dictionary
    for i, (prefix, argdict) in enumerate(commands):
        argdict['format'] = 'json'
        if prefix == 'osd setcrushmap':
            ret, stdout, outs = transform_crushmap(argdict['data'], 'set')
            if ret != 0:
                raise RuntimeError(outs)
            ret, outbuf, outs = json_command(cluster_handle,
                                             prefix=prefix,
                                             argdict={},
                                             timeout=RADOS_TIMEOUT,
                                             inbuf=stdout)
        else:
            ret, outbuf, outs = json_command(cluster_handle,
                                             prefix=prefix,
                                             argdict=argdict,
                                             timeout=RADOS_TIMEOUT)
        if ret != 0:
            return {
                'error':
                True,
                'results':
                results,
                'error_status':
                outs,
                'versions':
                cluster_status(cluster_handle, cluster_name)['versions'],
                'fsid':
                fsid
            }
        if outbuf:
            results.append(json.loads(outbuf))
        else:
            results.append(None)

    # For all RADOS commands, we include the cluster map versions
    # in the response, so that the caller knows which versions to
    # wait for in order to see the consequences of their actions.
    # TODO: not all commands will require version info on completion, consider making
    # this optional.
    # TODO: we should endeavor to return something clean even if we can't talk to RADOS
    # enough to get version info
    versions = cluster_status(cluster_handle, cluster_name)['versions']

    # Success
    return {
        'error': False,
        'results': results,
        'error_status': '',
        'versions': versions,
        'fsid': fsid
    }
Esempio n. 26
0
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import rados

# This utility will be enhanced as needed. It will need to be ran from a ceph node.

cluster = rados.Rados(
    conffile=''
)  # /etc/ceph/ceph.conf', conf=dict(keyring='/etc/ceph/ceph.client.admin.keyring'))
cluster.connect()

print "Ceph (librados) version: " + str(cluster.version())

cluster.create_pool('test-me')
print "\nCeph pools:"
pools = cluster.list_pools()
for pool in pools:
    print pool

cluster_stats = cluster.get_cluster_stats()

print "\nCluster stats:"
for key, value in cluster_stats.iteritems():
Esempio n. 27
0
def get_heartbeats():
    """
    The goal here is *not* to give a helpful summary of
    the cluster status, rather it is to give the minimum
    amount if information to let an informed master decide
    whether it needs to ask us for any additional information,
    such as updated copies of the cluster maps.

    Enumerate Ceph services running locally, for each report
    its FSID, type and ID.

    If a mon is running here, do some extra work:

    - Report the mapping of cluster name to FSID from /etc/ceph/<cluster name>.conf
    - For all clusters, report the latest versions of all cluster maps.

    :return A 2-tuple of dicts for services, clusters

    """

    try:
        import rados
    except ImportError:
        # Ceph isn't installed, report no services or clusters
        server_heartbeat = {
            'services': {},
            'boot_time': get_boot_time(),
            'ceph_version': None
        }
        return server_heartbeat, {}

    # Map of FSID to path string string
    mon_sockets = {}
    # FSID string to cluster name string
    fsid_names = {}
    # Service name to service dict
    services = {}

    # For each admin socket, try to interrogate the service
    for filename in glob("/var/run/ceph/*.asok"):
        try:
            service_data = service_status(filename)
        except (rados.Error, MonitoringError):
            # Failed to get info for this service, stale socket or unresponsive,
            # exclude it from report
            pass
        else:
            if not service_data:
                continue

            service_name = "%s-%s.%s" % (service_data['cluster'],
                                         service_data['type'],
                                         service_data['id'])

            services[service_name] = service_data
            fsid_names[service_data['fsid']] = service_data['cluster']

            if service_data['type'] == 'mon' and service_data['status'][
                    'rank'] in service_data['status']['quorum']:
                # A mon in quorum is elegible to emit a cluster heartbeat
                mon_sockets[service_data['fsid']] = filename

    # Installed Ceph version (as oppose to per-service running ceph version)
    ceph_version_str = __salt__['pkg.version']('ceph')  # noqa
    if ceph_version_str:
        ceph_version = ceph_version_str
    else:
        ceph_version = None

    # For each ceph cluster with an in-quorum mon on this node, interrogate the cluster
    cluster_heartbeat = {}
    for fsid, socket_path in mon_sockets.items():
        try:
            cluster_handle = rados.Rados(name=RADOS_NAME,
                                         clustername=fsid_names[fsid],
                                         conffile='')
            cluster_handle.connect()
            cluster_heartbeat[fsid] = cluster_status(cluster_handle,
                                                     fsid_names[fsid])
        except (rados.Error, MonitoringError):
            # Something went wrong getting data for this cluster, exclude it from our report
            pass

    server_heartbeat = {
        'services': services,
        'boot_time': get_boot_time(),
        'ceph_version': ceph_version
    }

    return server_heartbeat, cluster_heartbeat
Esempio n. 28
0
#!/usr/bin/python
#########################################################################
# Deletes a list of files in the form:
# datadisk/rucio/mc15_14TeV/24/3d/HITS.09709777._000863.pool.root.1
#
#
#########################################################################

import rados, sys, math

# Open connection to ATLAS pool
cluster = rados.Rados(conffile='/etc/ceph/ceph.conf')
cluster.connect()
ioctx = cluster.open_ioctx('atlas')

# Open file
with open(sys.argv[1]) as f:
    for line in f:
        filename = line.strip()
        chunk0 = filename + '.0000000000000000'

        # Get filesize from first chunk
        try:
            filesize = int(ioctx.get_xattr(chunk0, "striper.size"))
            numberchunks = math.ceil(filesize / 67108864)
            print filename + " is made up of " + str(numberchunks) + " chunks"
        except:
            print chunk0 + " does not have metadata.  Trying force delete"
            try:
                ioctx.remove_object(chunk0)
            except:
Esempio n. 29
0
def api_setup(app, conf, cluster, clientname, clientid, args):
    '''
    This is done globally, and cluster connection kept open for
    the lifetime of the daemon.  librados should assure that even
    if the cluster goes away and comes back, our connection remains.

    Initialize the running instance.  Open the cluster, get the command
    signatures, module, perms, and help; stuff them away in the app.ceph_urls
    dict.  Also save app.ceph_sigdict for help() handling.
    '''
    def get_command_descriptions(cluster, target=('mon', '')):
        ret, outbuf, outs = json_command(cluster, target,
                                         prefix='get_command_descriptions',
                                         timeout=30)
        if ret:
            err = "Can't get command descriptions: {0}".format(outs)
            app.logger.error(err)
            raise EnvironmentError(ret, err)

        try:
            sigdict = parse_json_funcsigs(outbuf, 'rest')
        except Exception as e:
            err = "Can't parse command descriptions: {}".format(e)
            app.logger.error(err)
            raise EnvironmentError(err)
        return sigdict

    app.ceph_cluster = cluster or 'ceph'
    app.ceph_urls = {}
    app.ceph_sigdict = {}
    app.ceph_baseurl = ''

    conf = conf or ''
    cluster = cluster or 'ceph'
    clientid = clientid or DEFAULT_ID
    clientname = clientname or 'client.' + clientid

    # Added a mechanism to make the ceph-rest-api retry until it is able to
    # connect to the Ceph cluster. Originally, it would try once and then
    # the process would die.
    connected = False
    while not connected:
        app.ceph_cluster = rados.Rados(name=clientname, conffile=conf)
        app.ceph_cluster.conf_parse_argv(args)

        while True:
            try:
                # Only block for 20 seconds waiting to connect.
                app.ceph_cluster.connect(timeout=20)
            except rados.InProgress:
                # Wait before retrying. If the connection is in-progress, the
                # connect returns immediately.
                print "Connection attempt in progress - re-trying"
                time.sleep(10)
                continue
            except rados.TimedOut:
                print "Connection TimedOut - shutting down and re-trying"
                app.ceph_cluster.shutdown()
                break
            except Exception as e:
                # Keep trying to connect until successful
                print "Exception from connect: {}".format(e)
                time.sleep(10)
                continue

            connected = True
            break

    app.ceph_baseurl = app.ceph_cluster.conf_get('restapi_base_url') \
        or DEFAULT_BASEURL
    if app.ceph_baseurl.endswith('/'):
        app.ceph_baseurl = app.ceph_baseurl[:-1]
    addr = app.ceph_cluster.conf_get('public_addr') or DEFAULT_ADDR

    # remove any nonce from the conf value
    addr = addr.split('/')[0]
    addr, port = addr.rsplit(':', 1)
    addr = addr or DEFAULT_ADDR
    port = port or DEFAULT_PORT
    port = int(port)

    loglevel = app.ceph_cluster.conf_get('restapi_log_level') \
        or DEFAULT_LOG_LEVEL
    # ceph has a default log file for daemons only; clients (like this)
    # default to "".  Override that for this particular client.
    logfile = app.ceph_cluster.conf_get('log_file')
    if not logfile:
        logfile = os.path.join(
            DEFAULT_LOGDIR,
            '{cluster}-{clientname}.{pid}.log'.format(
                cluster=cluster,
                clientname=clientname,
                pid=os.getpid()
            )
        )
    app.logger.addHandler(logging.handlers.WatchedFileHandler(logfile))
    app.logger.setLevel(LOGLEVELS[loglevel.lower()])
    for h in app.logger.handlers:
        h.setFormatter(logging.Formatter(
            '%(asctime)s %(name)s %(levelname)s: %(message)s',
            '%FT%T'))

    app.ceph_sigdict = get_command_descriptions(app.ceph_cluster)

    osdid = find_up_osd(app)
    if osdid is not None:
        osd_sigdict = get_command_descriptions(app.ceph_cluster,
                                               target=('osd', int(osdid)))

        # shift osd_sigdict keys up to fit at the end of the mon's app.ceph_sigdict
        maxkey = sorted(app.ceph_sigdict.keys())[-1]
        maxkey = int(maxkey.replace('cmd', ''))
        osdkey = maxkey + 1
        for k, v in osd_sigdict.iteritems():
            newv = v
            newv['flavor'] = 'tell'
            globk = 'cmd' + str(osdkey)
            app.ceph_sigdict[globk] = newv
            osdkey += 1

    # app.ceph_sigdict maps "cmdNNN" to a dict containing:
    # 'sig', an array of argdescs
    # 'help', the helptext
    # 'module', the Ceph module this command relates to
    # 'perm', a 'rwx*' string representing required permissions, and also
    #    a hint as to whether this is a GET or POST/PUT operation
    # 'avail', a comma-separated list of strings of consumers that should
    #    display this command (filtered by parse_json_funcsigs() above)
    app.ceph_urls = {}
    for cmdnum, cmddict in app.ceph_sigdict.iteritems():
        cmdsig = cmddict['sig']
        flavor = cmddict.get('flavor', 'mon')
        url, params = generate_url_and_params(app, cmdsig, flavor)
        perm = cmddict['perm']
        for k in METHOD_DICT.iterkeys():
            if k in perm:
                methods = METHOD_DICT[k]
        urldict = {'paramsig': params,
                   'help': cmddict['help'],
                   'module': cmddict['module'],
                   'perm': perm,
                   'flavor': flavor,
                   'methods': methods, }

        # app.ceph_urls contains a list of urldicts (usually only one long)
        if url not in app.ceph_urls:
            app.ceph_urls[url] = [urldict]
        else:
            # If more than one, need to make union of methods of all.
            # Method must be checked in handler
            methodset = set(methods)
            for old_urldict in app.ceph_urls[url]:
                methodset |= set(old_urldict['methods'])
            methods = list(methodset)
            app.ceph_urls[url].append(urldict)

        # add, or re-add, rule with all methods and urldicts
        app.add_url_rule(url, url, handler, methods=methods)
        url += '.<fmt>'
        app.add_url_rule(url, url, handler, methods=methods)

    app.logger.debug("urls added: %d", len(app.ceph_urls))

    app.add_url_rule('/<path:catchall_path>', '/<path:catchall_path>',
                     handler, methods=['GET', 'PUT'])
    return addr, port
Esempio n. 30
0
    def define_luns(logger, config, target):
        """
        define the disks in the config to LIO and map to a LUN
        :param logger: logger object to print to
        :param config: configuration dict from the rados pool
        :param target: (object) gateway object - used for mapping
        :raises CephiSCSIError.
        """

        ips = ip_addresses()
        local_gw = this_host()

        target_disks = config.config["targets"][target.iqn]['disks']
        if not target_disks:
            logger.info("No LUNs to export")
            return

        disks = {}
        for disk in target_disks:
            disks[disk] = config.config['disks'][disk]

        # sort the disks dict keys, so the disks are registered in a specific
        # sequence
        srtd_disks = sorted(disks)
        pools = {disks[disk_key]['pool'] for disk_key in srtd_disks}

        ips = ip_addresses()

        with rados.Rados(conffile=settings.config.cephconf,
                         name=settings.config.cluster_client_name) as cluster:

            for pool in pools:

                logger.debug("Processing rbd's in '{}' pool".format(pool))

                with cluster.open_ioctx(pool) as ioctx:

                    pool_disks = [disk_key for disk_key in srtd_disks
                                  if disk_key.startswith(pool + '/')]
                    for disk_key in pool_disks:

                        pool, image_name = disk_key.split('/')
                        with rbd.Image(ioctx, image_name) as rbd_image:

                            disk_config = config.config['disks'][disk_key]
                            backstore = disk_config['backstore']
                            backstore_object_name = disk_config['backstore_object_name']

                            lun = LUN(logger, pool, image_name,
                                      rbd_image.size(), local_gw, backstore,
                                      backstore_object_name)

                            if lun.error:
                                raise CephiSCSIError("Error defining rbd image {}"
                                                     .format(disk_key))

                            so = lun.allocate()
                            if lun.error:
                                raise CephiSCSIError("Unable to register {} "
                                                     "with LIO: {}"
                                                     .format(disk_key,
                                                             lun.error_msg))

                            # If not in use by another target on this gw
                            # clean up stale locks.
                            if so.status != 'activated':
                                RBDDev.rbd_lock_cleanup(logger, ips,
                                                        rbd_image)

                            target._map_lun(config, so)
                            if target.error:
                                raise CephiSCSIError("Mapping for {} failed: {}"
                                                     .format(disk_key,
                                                             target.error_msg))