Exemple #1
0
 def get_device_health(self, device_id):
     res = {}
     try:
         with self._open_connection() as ioctx:
             with rados.ReadOpCtx() as op:
                 iter, ret = ioctx.get_omap_vals(op, '', '', 500)
                 assert ret == 0
                 try:
                     ioctx.operate_read_op(op, device_id)
                     for key, value in list(iter):
                         v = None
                         try:
                             v = json.loads(value)
                         except ValueError:
                             self.module.log.error(
                                 'unable to parse value for %s: "%s"' %
                                 (key, value))
                         res[key] = v
                 except IOError:
                     pass
                 except OSError as e:
                     self.module.log.error(
                         'unable to get device {} health, {}'.format(
                             device_id, str(e)))
     except IOError:
         return {}
     return res
Exemple #2
0
 def _get_device_metrics(self, devid, sample=None, min_sample=None):
     res = {}
     ioctx = self.open_connection(create_if_missing=False)
     if not ioctx:
         return {}
     with ioctx:
         with rados.ReadOpCtx() as op:
             omap_iter, ret = ioctx.get_omap_vals(op, min_sample or '', sample or '',
                                                  MAX_SAMPLES)  # fixme
             assert ret == 0
             try:
                 ioctx.operate_read_op(op, devid)
                 for key, value in list(omap_iter):
                     if sample and key != sample:
                         break
                     if min_sample and key < min_sample:
                         break
                     try:
                         v = json.loads(value)
                     except (ValueError, IndexError):
                         self.log.debug('unable to parse value for %s: "%s"' %
                                        (key, value))
                         pass
                     res[key] = v
             except rados.ObjectNotFound:
                 pass
             except rados.Error as e:
                 self.log.exception("RADOS error reading omap: {0}".format(e))
                 raise
     return res
Exemple #3
0
    def show_device_metrics(self, devid, sample):
        # verify device exists
        r = self.get("device " + devid)
        if not r or 'device' not in r.keys():
            return -errno.ENOENT, '', 'device ' + devid + ' not found'
        # fetch metrics
        res = {}
        ioctx = self.open_connection(create_if_missing=False)
        if ioctx:
            with rados.ReadOpCtx() as op:
                omap_iter, ret = ioctx.get_omap_vals(op, "", sample or '',
                                                     500)  # fixme
                assert ret == 0
                try:
                    ioctx.operate_read_op(op, devid)
                    for key, value in list(omap_iter):
                        if sample and key != sample:
                            break
                        try:
                            v = json.loads(value)
                        except (ValueError, IndexError):
                            self.log.debug(
                                'unable to parse value for %s: "%s"' %
                                (key, value))
                            pass
                        res[key] = v
                except rados.ObjectNotFound:
                    pass
                except rados.Error as e:
                    self.log.exception(
                        "RADOS error reading omap: {0}".format(e))
                    raise

        return 0, json.dumps(res, indent=4), ''
Exemple #4
0
 def put_device_metrics(self, ioctx, devid, data):
     old_key = datetime.now() - timedelta(
         seconds=int(self.retention_period))
     prune = old_key.strftime(TIME_FORMAT)
     self.log.debug('put_device_metrics device %s prune %s' %
                    (devid, prune))
     erase = []
     try:
         with rados.ReadOpCtx() as op:
             iter, ret = ioctx.get_omap_keys(op, "", 500)  # fixme
             assert ret == 0
             ioctx.operate_read_op(op, devid)
             for key, _ in list(iter):
                 if key >= prune:
                     break
                 erase.append(key)
     except:
         pass
     key = datetime.now().strftime(TIME_FORMAT)
     self.log.debug('put_device_metrics device %s key %s = %s, erase %s' %
                    (devid, key, data, erase))
     with rados.WriteOpCtx() as op:
         ioctx.set_omap(op, (key, ), (str(json.dumps(data)), ))
         if len(erase):
             ioctx.remove_omap_keys(op, tuple(erase))
         ioctx.operate_write_op(op, devid)
Exemple #5
0
    def load_task_queue(self, ioctx, pool_name):
        pool_spec = pool_name
        if ioctx.nspace:
            pool_spec += "/{}".format(ioctx.nspace)

        start_after = ''
        try:
            while True:
                with rados.ReadOpCtx() as read_op:
                    self.log.info("load_task_task: {}, start_after={}".format(
                        pool_spec, start_after))
                    it, ret = ioctx.get_omap_vals(read_op, start_after, "", 128)
                    ioctx.operate_read_op(read_op, RBD_TASK_OID)

                    it = list(it)
                    for k, v in it:
                        start_after = k
                        v = v.decode()
                        self.log.info("load_task_task: task={}".format(v))

                        try:
                            task = Task.from_json(v)
                            self.append_task(task)
                        except ValueError:
                            self.log.error("Failed to decode task: pool_spec={}, task={}".format(pool_spec, v))

                    if not it:
                        break

        except StopIteration:
            pass
        except rados.ObjectNotFound:
            # rbd_task DNE
            pass
Exemple #6
0
 def show_device_metrics(self, devid, sample):
     # verify device exists
     r = self.get("device " + devid)
     if not r or 'device' not in r.keys():
         return (-errno.ENOENT, '', 'device ' + devid + ' not found')
     # fetch metrics
     ioctx = self.open_connection()
     res = {}
     with rados.ReadOpCtx() as op:
         iter, ret = ioctx.get_omap_vals(op, "", sample or '', 500)  # fixme
         assert ret == 0
         try:
             ioctx.operate_read_op(op, devid)
             for key, value in list(iter):
                 if sample and key != sample:
                     break
                 try:
                     v = json.loads(value)
                 except:
                     self.log.debug('unable to parse value for %s: "%s"' %
                                    (key, value))
                     pass
                 res[key] = v
         except:
             pass
     return 0, json.dumps(res, indent=4), ''
Exemple #7
0
    def _get(self, key: str) -> Optional[str]:
        # Try to get the value from the kvstore in our pool,
        # if that works, stash it in our local cache, then
        # return the value from the local cache.  If we can't
        # get the value from the cluster, this gets whatever
        # was last stashed in the local cache.
        # This implies that it's possible for values to be
        # quite stale in bizarre failure cases (value not read
        # for a long time, then updated in cluster by some other
        # instance, then cluster dies, then this instance reads,
        # gets the old value)
        if self._ioctx:
            try:
                with rados.ReadOpCtx() as op:
                    omap_iter, ret = self._ioctx.get_omap_vals_by_keys(
                        op, (key, ))
                    assert ret == 0  # ???
                    # TODO: does this need to be async?
                    self._ioctx.operate_read_op(op, "kvstore")
                    kv = dict(omap_iter)
                    if key in kv:
                        self._db[key] = kv[key]
                    else:
                        # key not present in cluster kvstore, make sure
                        # it's also not present in db (prevent stale cache)
                        if key in self._db:
                            del self._db[key]

            except Exception as e:
                logger.exception(str(e))

        value = self._db.get(key)
        if not value:
            return None
        return value.decode("utf-8")
Exemple #8
0
def load_dir_map(ioctx):
    dir_mapping = {}  # type: Dict[str, Dict]
    log.info('loading dir map...')
    try:
        with rados.ReadOpCtx() as read_op:
            start = ""
            while True:
                iter, ret = ioctx.get_omap_vals(read_op, start,
                                                DIRECTORY_MAP_PREFIX,
                                                MAX_RETURN)
                if not ret == 0:
                    log.error(f'failed to fetch dir mapping omap')
                    raise Exception(-errno.EINVAL)
                ioctx.operate_read_op(read_op, MIRROR_OBJECT_NAME)
                dir_map = dict(iter)
                if not dir_map:
                    break
                handle_dir_load(dir_mapping, dir_map)
                start = dir_map.popitem()[0]
        log.info("loaded {0} directory mapping(s) from disk".format(
            len(dir_mapping)))
        return dir_mapping
    except rados.Error as e:
        log.error(f'exception when loading directory mapping: {e}')
        raise Exception(-e.errno)
Exemple #9
0
 def list_quota_kvs():
     """
     """
     omap_list = []
     start_after = ''
     filter_prefix = ''
     try:
         with rados.Rados(conffile=_CLUSTER_CONFFILE) as cluster:
             pool_list = cluster.list_pools()
             meta_pool = [p for p in pool_list if META_POOL_KEY in p][0]
             with cluster.open_ioctx(meta_pool) as ioctx:
                 with rados.ReadOpCtx(ioctx) as read_op:
                     ret = ioctx.get_omap_vals(read_op, start_after, filter_prefix,
                                               MAX_NORMAL_QUOTA_COUNT + MAX_DEFAULT_QUOTA_COUNT)
                     ioctx.operate_read_op(read_op, QUOTA_OBJ_NAME)
                     omap_list = list(ret[0])
         return omap_list
     except rados.ObjectNotFound:
         print('there is no quota object: {}'.format(QUOTA_OBJ_NAME))
         raise e
     except rados.TimedOut:
         print('read omap timed out')
         raise e
     except Exception as e:
         print('caught exception with message: {}'.format(e))
         raise e
Exemple #10
0
    def put_device_metrics(self, ioctx, devid, data):
        old_key = datetime.utcnow() - timedelta(
            seconds=int(self.retention_period))
        prune = old_key.strftime(TIME_FORMAT)
        self.log.debug('put_device_metrics device %s prune %s' %
                       (devid, prune))
        erase = []
        try:
            with rados.ReadOpCtx() as op:
                omap_iter, ret = ioctx.get_omap_keys(op, "", 500)  # fixme
                assert ret == 0
                ioctx.operate_read_op(op, devid)
                for key, _ in list(omap_iter):
                    if key >= prune:
                        break
                    erase.append(key)
        except rados.ObjectNotFound:
            # The object doesn't already exist, no problem.
            pass
        except rados.Error as e:
            # Do not proceed with writes if something unexpected
            # went wrong with the reads.
            self.log.exception("Error reading OMAP: {0}".format(e))
            return

        key = datetime.utcnow().strftime(TIME_FORMAT)
        self.log.debug('put_device_metrics device %s key %s = %s, erase %s' %
                       (devid, key, data, erase))
        with rados.WriteOpCtx() as op:
            ioctx.set_omap(op, (key, ), (str(json.dumps(data)), ))
            if len(erase):
                ioctx.remove_omap_keys(op, tuple(erase))
            ioctx.operate_write_op(op, devid)
Exemple #11
0
def count_threads_in_omap(omap_obj):
  with rados.ReadOpCtx() as op:
    omaps, ret = ioctx.get_omap_vals(op, "", "", -1)
    ioctx.operate_read_op(op, omap_obj)
    # can't use len(keys) because keys is a generator
    ct = 0
    for (k, _) in omaps:
      if debug: print('in omap for %s: %s' % (omap_obj, k))
      ct += 1
    return ct
Exemple #12
0
    def load_from_pool(self, ioctx: rados.Ioctx,
                       namespace_validator: Optional[Callable],
                       image_validator: Optional[Callable]) -> None:
        pool_id = ioctx.get_pool_id()
        pool_name = ioctx.get_pool_name()
        stale_keys = []
        start_after = ''
        try:
            while True:
                with rados.ReadOpCtx() as read_op:
                    self.handler.log.info(
                        "load_schedules: {}, start_after={}".format(
                            pool_name, start_after))
                    it, ret = ioctx.get_omap_vals(read_op, start_after, "",
                                                  128)
                    ioctx.operate_read_op(read_op, self.handler.SCHEDULE_OID)

                    it = list(it)
                    for k, v in it:
                        start_after = k
                        v = v.decode()
                        self.handler.log.info("load_schedule: {} {}".format(
                            k, v))
                        try:
                            try:
                                level_spec = LevelSpec.from_id(
                                    self.handler, k, namespace_validator,
                                    image_validator)
                            except ValueError:
                                self.handler.log.debug(
                                    "Stale schedule key %s in pool %s", k,
                                    pool_name)
                                stale_keys.append(k)
                                continue

                            self.level_specs[level_spec.id] = level_spec
                            schedule = Schedule.from_json(level_spec.name, v)
                            self.schedules[level_spec.id] = schedule
                        except ValueError:
                            self.handler.log.error(
                                "Failed to decode schedule: pool={}, {} {}".
                                format(pool_name, k, v))
                    if not it:
                        break

        except StopIteration:
            pass
        except rados.ObjectNotFound:
            pass

        if stale_keys:
            with rados.WriteOpCtx() as write_op:
                ioctx.remove_omap_keys(write_op, stale_keys)
                ioctx.operate_write_op(write_op, self.handler.SCHEDULE_OID)
Exemple #13
0
    def put_device_metrics(self, ioctx: rados.Ioctx, devid: str, data: Any) -> None:
        assert devid
        old_key = datetime.utcnow() - timedelta(
            seconds=self.retention_period)
        prune = old_key.strftime(TIME_FORMAT)
        self.log.debug('put_device_metrics device %s prune %s' %
                       (devid, prune))
        erase = []
        try:
            with rados.ReadOpCtx() as op:
                # FIXME
                omap_iter, ret = ioctx.get_omap_keys(op, "", MAX_SAMPLES)
                assert ret == 0
                ioctx.operate_read_op(op, devid)
                for key, _ in list(omap_iter):
                    if key >= prune:
                        break
                    erase.append(key)
        except rados.ObjectNotFound:
            # The object doesn't already exist, no problem.
            pass
        except rados.Error as e:
            # Do not proceed with writes if something unexpected
            # went wrong with the reads.
            self.log.exception("Error reading OMAP: {0}".format(e))
            return

        key = datetime.utcnow().strftime(TIME_FORMAT)
        self.log.debug('put_device_metrics device %s key %s = %s, erase %s' %
                       (devid, key, data, erase))
        with rados.WriteOpCtx() as op:
            ioctx.set_omap(op, (key,), (str(json.dumps(data)),))
            if len(erase):
                ioctx.remove_omap_keys(op, tuple(erase))
            ioctx.operate_write_op(op, devid)

        # extract wear level?
        wear_level = get_ata_wear_level(data)
        if wear_level is None:
            wear_level = get_nvme_wear_level(data)
        dev_data = self.get(f"device {devid}") or {}
        if wear_level is not None:
            if dev_data.get(wear_level) != str(wear_level):
                dev_data["wear_level"] = str(wear_level)
                self.log.debug(f"updating {devid} wear level to {wear_level}")
                self.set_device_wear_level(devid, wear_level)
        else:
            if "wear_level" in dev_data:
                del dev_data["wear_level"]
                self.log.debug(f"removing {devid} wear level")
                self.set_device_wear_level(devid, -1.0)
Exemple #14
0
    def load_pool_schedules(self, ioctx, schedules):
        pool_id = ioctx.get_pool_id()
        pool_name = ioctx.get_pool_name()
        stale_keys = ()
        start_after = ''
        try:
            while True:
                with rados.ReadOpCtx() as read_op:
                    self.log.info("load_schedules: {}, start_after={}".format(
                        pool_name, start_after))
                    it, ret = ioctx.get_omap_vals(read_op, start_after, "",
                                                  128)
                    ioctx.operate_read_op(read_op, SCHEDULE_OID)

                    it = list(it)
                    for k, v in it:
                        start_after = k
                        v = v.decode()
                        self.log.info("load_schedule: {} {}".format(k, v))

                        try:
                            try:
                                level_spec = LevelSpec.from_id(self, k)
                            except ValueError:
                                self.log.debug(
                                    "Stail schedule key {} in pool".format(
                                        k, pool_name))
                                stale_keys += (k, )
                                continue

                            schedule = Schedule.from_json(level_spec.name, v)
                            schedules[k] = schedule
                        except ValueError:
                            self.log.error(
                                "Failed to decode schedule: pool={}, {} {}".
                                format(pool_name, k, v))

                    if not it:
                        break

        except StopIteration:
            pass
        except rados.ObjectNotFound:
            # rbd_mirror_snapshot_schedule DNE
            pass

        if stale_keys:
            with rados.WriteOpCtx() as write_op:
                ioctx.remove_omap_keys(write_op, stale_keys)
                ioctx.operate_write_op(write_op, SCHEDULE_OID)
Exemple #15
0
def rbd_prefix_to_name(options):
    assert (options.prefix.startswith('rbd_data.'))
    key = options.prefix.replace('rbd_data.', 'id_')
    pool = options.pool
    cluster = rados.Rados(conffile='/etc/ceph/ceph.conf')
    cluster.connect()
    ioctx = cluster.open_ioctx(pool)
    with rados.ReadOpCtx(ioctx) as read_op:
        iter, ret = ioctx.get_omap_vals_by_keys(read_op, (key, ))
        assert (ret == 0)
        ioctx.operate_read_op(read_op, "rbd_directory")
        try:
            print list(iter)[0][1][4:]
        except IndexError:
            print 'Error: %s not found in pool %s' % (key, pool)
            sys.exit(-1)
Exemple #16
0
 def _load_legacy_object(self, ioctx: rados.Ioctx, oid: str) -> bool:
     MAX_OMAP = 10000
     self.log.debug(f"loading object {oid}")
     if re.search(self.devre, oid) is None:
         return False
     with rados.ReadOpCtx() as op:
         it, rc = ioctx.get_omap_vals(op, None, None, MAX_OMAP)
         if rc == 0:
             ioctx.operate_read_op(op, oid)
             count = 0
             for t, raw_smart in it:
                 self.log.debug(f"putting {oid} {t}")
                 self._legacy_put_device_metrics(t, oid, raw_smart)
                 count += 1
             assert count < MAX_OMAP
     self.log.debug(f"removing object {oid}")
     ioctx.remove_object(oid)
     return True
Exemple #17
0
def loopRados(ioctx):
    for object in ioctx.list_objects():
        if object.key[:7] == "rbd_id.":
            myrbd = rbd.Image(ioctx, name=object.key[7:])
            fields = {"name": myrbd.get_name(), "id": myrbd.id()}
            if args.force:
                for myfield in antifield.keys():
                    print("Forcing {} entry for {}".format(
                        myfield, fields['name']))
                    appendOmap(fields, myfield)
            else:
                with rados.ReadOpCtx() as read_op:
                    for myfield in antifield.keys():
                        iter, ret = ioctx.get_omap_vals_by_keys(
                            read_op,
                            tuple([
                                "{}_{}".format(myfield, fields[myfield]),
                            ]))
                        ioctx.operate_read_op(read_op, "rbd_directory")
                        checkIter(iter, fields, myfield)
Exemple #18
0
    async def get_prefix(self, key_prefix: str) -> List[str]:
        """Get a range of keys with a prefix"""
        # This is ugly, because pulling it out of omap is nice (there's a prefix
        # arg), but getting it from dbm means iterating through everything.
        # On the plus side, we're probably not talking about a lot of data...
        # But there's a perf optimization argument here for taking the values from
        # omap, setting them in the dbm, and returning the values from *omap* if
        # present, then only falling back to iterating dbm if we don't have the
        # values, rather than using the simpler logic in get().  But that might be
        # premature optimization, so possibly more straightforward to do the
        # simple implementation, then comment this for later use.
        values = []

        if self._ioctx:
            try:
                with rados.ReadOpCtx() as op:
                    omap_iter, ret = self._ioctx.get_omap_vals(
                        op,
                        start_after="",
                        filter_prefix=key_prefix,
                        max_return=1000,
                    )
                    assert ret == 0  # ???
                    # TODO: does this need to be async?
                    self._ioctx.operate_read_op(op, "kvstore")
                    for k, v in list(omap_iter):
                        self._db[k] = v

            except Exception as e:
                logger.exception(str(e))

        # Note: firstkey/nextkey assumes the gdbm implementation,
        # but that seems pretty damn safe...
        k = self._db.firstkey()  # type: ignore
        while k != None:
            if k.startswith(key_prefix.encode("utf-8")):  # type: ignore
                values.append(self._db[k].decode("utf-8"))
            k = self._db.nextkey(k)  # type: ignore
        return values
Exemple #19
0
 def quota_get_value_by_key(key):
     """
     """
     value_list = []
     try:
         with rados.Rados(conffile=_CLUSTER_CONFFILE) as cluster:
             pool_list = cluster.list_pools()
             meta_pool = [p for p in pool_list if META_POOL_KEY in p][0]
             with cluster.open_ioctx(meta_pool) as ioctx:
                 with rados.ReadOpCtx(ioctx) as read_op:
                     ret = ioctx.get_omap_vals_by_keys(read_op, (key,))
                     ioctx.operate_read_op(read_op, QUOTA_OBJ_NAME)
                     value_list = list(ret[0])
         return value_list
     except rados.ObjectNotFound:
         print('there is no quota object: {}'.format(QUOTA_OBJ_NAME))
         raise e
     except rados.TimedOut:
         print('read omap timed out')
         raise e
     except Exception as e:
         print('caught exception with message: {}'.format(e))
         raise e
Exemple #20
0
def load_instances(ioctx):
    instance_mapping = {}  # type: Dict[str, Dict]
    log.info('loading instances...')
    try:
        with rados.ReadOpCtx() as read_op:
            start = ""
            while True:
                iter, ret = ioctx.get_omap_vals(read_op, start,
                                                INSTANCE_ID_PREFIX, MAX_RETURN)
                if not ret == 0:
                    log.error(f'failed to fetch instance omap')
                    raise Exception(-errno.EINVAL)
                ioctx.operate_read_op(read_op, MIRROR_OBJECT_NAME)
                instance_map = dict(iter)
                if not instance_map:
                    break
                handle_instance_load(instance_mapping, instance_map)
                start = instance_map.popitem()[0]
        log.info("loaded {0} instance(s) from disk".format(
            len(instance_mapping)))
        return instance_mapping
    except rados.Error as e:
        log.error(f'exception when loading instances: {e}')
        raise Exception(-e.errno)
Exemple #21
0
            omap_key_name = '%s-%09d' % (key_prefix, (omap_kvpairs_per_call - k) + base_key)
            if omap_value_size > 0:
              v = omap_key_name
              while len(v) < omap_value_size: v = v + '.' + v
              value = v[:omap_value_size]
            # syntax weirdometer alert
            ioctx.set_omap(op, (omap_key_name,), (value,))
          ioctx.operate_write_op(op, per_thread_obj_name)
        base_key += omap_kvpairs_per_call
        check_measurement_over(call_start_time, omap_time_estimator)
        #if measurement_over: break

    elif optype == 'omap-read':
      ioctx.read(per_thread_obj_name)
      keycount = 0
      with rados.ReadOpCtx() as op:
        last_key=''
        while True:
          iter, ret = ioctx.get_omap_vals(op, last_key, "", omap_kvpairs_per_call)
          assert(ret == 0)
          ioctx.operate_read_op(op, per_thread_obj_name)
          pairs_in_iter = 0
          for (k,v) in list(iter):
            call_start_time = time.time()
            # count omap keys as objects for throughput calculation
            keycount += 1
            if debug: print('%s, %s' % (k, str(v)))
            if k < last_key:
              print('ERROR: key %s < last key %s' % (k, last_key))
            last_key = k
            pairs_in_iter += 1
Exemple #22
0
                # syntax weirdometer alert
                ioctx.set_omap(op, (omap_key_name, ), (value, ))
            ioctx.operate_write_op(op, obj_name)
            if think_time > 0.0:
                time.sleep(think_time)
            base_key += keys_per_call

            # we read the entire omap when it reaches 4^k in size
            # this means amortized cost of omap read should be O(N)

            if direction == 'writeread' and base_key > next_power_of_4:
                if debug: print('next_power_of_4: %d' % next_power_of_4)
                next_power_of_4 *= 4
                read_start_time = time.time()
                read_keycount = 0
                with rados.ReadOpCtx() as read_op:
                    read_omap, ret = ioctx.get_omap_vals(read_op, "", "", -1)
                    assert (ret == 0)
                    ioctx.operate_read_op(read_op, obj_name)
                    read_keycount = 0
                    for (k, v) in read_omap:
                        read_keycount += 1
                        if debug: print(k)
                    read_end_time = time.time()
                    print('read keycount = %d' % read_keycount)
                    read_delta_time = read_end_time - read_start_time
                    print('elapsed read time: %f' % read_delta_time)
                    read_throughput = read_keycount / read_delta_time
                    print('read throughput = %f' % read_throughput)
                    sys.stdout.flush()
else: