Exemple #1
0
def compact_nodelist(node_string):
    """Convert a string of nodes into compact representation.

    Wraps ``scontrol show hostlist nid05032,nid05033,...`` to compress a list of
    nodes to a Slurm nodelist string.  This is effectively the reverse of
    ``expand_nodelist()``

    Args:
        node_string (str): Comma-separated list of node names (e.g.,
            ``nid05032,nid05033,...``)
    Returns:
        str: The compact representation of `node_string` (e.g.,
        ``nid0[5032-5159]``)
    """
    if not isstr(node_string):
        node_string = ','.join(list(node_string))

    try:
        node_string = subprocess.check_output([SCONTROL, 'show', 'hostlist', node_string]).strip()
    except OSError as error:
        if error.errno == errno.ENOENT:
            # "No such file or directory" from subprocess.check_output
            pass

    if isstr(node_string):
        return node_string
    else:
        return node_string.decode()
Exemple #2
0
    def _recast_keys(self, *target_keys):
        """Convert own keys into native Python objects.

        Scan self and convert special keys into native Python objects where
        appropriate.  If no keys are given, scan everything.  Do NOT attempt
        to recast anything that is not a string--this is to avoid relying on
        expand_nodelist if a key is already recast since expand_nodelist does
        not function outside of an environment containing Slurm.

        Args:
            *target_keys (list, optional): Only convert these keys into native
                Python object types.  If omitted, convert all keys.
        """
        scan_keys = len(target_keys)
        for counters in self.values():
            # if specific keys were passed, only look for those keys
            if scan_keys > 0:
                for key in target_keys:
                    value = counters[key]
                    if key in _RECAST_KEY_MAP and isstr(value):
                        counters[key] = _RECAST_KEY_MAP[key][0](value)
            # otherwise, attempt to recast every key
            else:
                for key, value in counters.items():
                    if key in _RECAST_KEY_MAP and isstr(value):
                        counters[key] = _RECAST_KEY_MAP[key][0](value)
Exemple #3
0
    def _load_subprocess(self, *args):
        """Run a subprocess and pass its stdout to a self-initializing parser
        """

        cmd = self.subprocess_cmd
        if args:
            cmd += args

        try:
            if self.silent_errors:
                with open(os.devnull, 'w') as devnull:
                    output_str = subprocess.check_output(cmd, stderr=devnull)
            else:
                output_str = subprocess.check_output(cmd)
        except subprocess.CalledProcessError as error:
            warnings.warn("%s returned nonzero exit code (%d)" %
                          (cmd, error.returncode))
            output_str = error.output
        except OSError as error:
            if error.errno == errno.ENOENT:
                raise type(error)(error.errno, "%s command not found" %
                                  self.subprocess_cmd[0])
            raise

        if isstr(output_str):
            # Python 2 - subprocess.check_output returns a string
            self.load_str(output_str)
        else:
            # Python 3 - subprocess.check_output returns encoded bytes
            self.load_str(output_str.decode())
Exemple #4
0
def expand_nodelist(node_string):
    """Expand Slurm compact nodelist into a set of nodes.

    Wraps ``scontrol show hostname nid0[5032-5159]`` to expand a Slurm nodelist
    string into a list of nodes.

    Args:
        node_string (str): Node list in Slurm's compact notation (e.g.,
            ``nid0[5032-5159]``)

    Returns:
        set: Set of strings which encode the fully expanded node names contained
        in `node_string`.
    """
    node_names = set([])
    try:
        output_str = subprocess.check_output([SCONTROL, 'show', 'hostname', node_string])
    except OSError as error:
        if error.errno == errno.ENOENT:
            raise type(error)(error.errno, "Slurm CLI (%s command) not found" % SCONTROL)
        raise

    if not isstr(output_str):
        output_str = output_str.decode() # for python3

    for line in output_str.splitlines():
        node_name = line.strip()
        if node_name:
            node_names.add(node_name)

    return node_names
Exemple #5
0
    def __repr__(self):
        """Serialize self in a format compatible with ``xtdb2proc``.

        Returns the object in the same format as the xtdb2proc output so that
        this object can be circularly serialized and deserialized.

        Returns:
        str: String representation of the processor mapping table in a
        format compatible with the output of ``xtdb2proc``.
        """
        repr_result = ""
        for _, record in self.items():
            line = []
            for key in self.key_order:
                try:
                    val = record[key]
                except KeyError:
                    sys.stderr.write("key does not appear in all records\n")
                    raise

                # Look at the type of each val and return the correponding string
                if val is None:
                    line.append("%s=null" % key)
                elif isstr(val):
                    line.append("%s='%s'" % (key, val))
                else:
                    line.append("%s=%s" % (key, val))

            repr_result += ','.join(line) + "\n"
        return repr_result
Exemple #6
0
    def archive_ost_data(self, lmtdb):
        """Extract and encode data from LMT's OST_DATA table

        Queries the LMT database, interprets resulting rows, and populates a
        dictionary of TimeSeries objects with those values.

        Args:
            lmtdb (LmtDb): database object
        """

        dataset_names = [
            'datatargets/readbytes', 'datatargets/writebytes',
            'fullness/bytes', 'fullness/bytestotal', 'fullness/inodes',
            'fullness/inodestotal'
        ]

        self.init_datasets(dataset_names, lmtdb.ost_names)

        # Now query the OST_DATA table to get byte counts over the query time range
        results, columns = lmtdb.get_ost_data(self.query_start,
                                              self.query_end_plusplus)

        # Index the columns to speed up insertion of data
        col_map = {}
        try:
            for db_col in [
                    'TIMESTAMP', 'OST_ID', 'READ_BYTES', 'WRITE_BYTES',
                    'KBYTES_USED', 'KBYTES_FREE', 'INODES_USED', 'INODES_FREE'
            ]:
                col_map[db_col] = columns.index(db_col)
        except ValueError:
            raise ValueError("LMT database schema does not match expectation")

        # Loop through all the results of the timeseries query
        for row in results:
            if isstr(row[col_map['TIMESTAMP']]):
                # SQLite stores timestamps as a unicode string
                timestamp = datetime.datetime.strptime(
                    row[col_map['TIMESTAMP']], "%Y-%m-%d %H:%M:%S")
            else:
                # MySQL timestamps are automatically converted to datetime.datetime
                timestamp = row[col_map['TIMESTAMP']]
            target_name = lmtdb.ost_id_map[row[col_map['OST_ID']]]
            for dataset_name in dataset_names:
                target_dbcol = self.config[dataset_name].get('column')
                if target_dbcol is not None:
                    self[dataset_name].insert_element(
                        timestamp, target_name, row[col_map[target_dbcol]])
                elif dataset_name == 'fullness/bytestotal':
                    self[dataset_name].insert_element(
                        timestamp, target_name, row[col_map['KBYTES_USED']] +
                        row[col_map['KBYTES_FREE']])
                elif dataset_name == 'fullness/inodestotal':
                    self[dataset_name].insert_element(
                        timestamp, target_name, row[col_map['INODES_USED']] +
                        row[col_map['INODES_FREE']])
                else:
                    errmsg = "%s in self.config but missing 'column' setting" % dataset_name
                    raise KeyError(errmsg)
Exemple #7
0
def init_config():
    """Loads the global configuration.

    Loads the site-wide configuration file, then inspects relevant environment
    variables for overrides.
    """
    global CONFIG
    global PYTOKIO_CONFIG_FILE
    global DEFAULT_CONFIG_FILE

    # Escape hatch for cases where we want to load the module without initalizing
    if os.environ.get('PYTOKIO_SKIP_CONFIG') is not None:
        return

    # Set the default config path - set here rather than in the global namespace
    # so site-specific paths don't get baked into the autodoc documentation
    DEFAULT_CONFIG_FILE = os.path.join(
        os.path.abspath(os.path.dirname(__file__)), 'site.json')

    # Load a pytokio config from a special location
    PYTOKIO_CONFIG_FILE = os.environ.get('PYTOKIO_CONFIG', DEFAULT_CONFIG_FILE)

    try:
        with open(PYTOKIO_CONFIG_FILE, 'rt') as config_file:
            if HAVE_YAML:
                loaded_config = load_and_expand_yaml(config_file)
            else:
                loaded_config = json.load(config_file)
    except (OSError, IOError):
        loaded_config = {}

    # Load pytokio config file and convert its keys into a set of constants
    for _key, _value in loaded_config.items():
        # config keys beginning with an underscore get skipped
        if _key.startswith('_'):
            pass

        # if setting this key will overwrite something already in the tokio.config
        # namespace, only overwrite if the existing object is something we probably
        # loaded from a json
        _old_attribute = CONFIG.get(_key.lower())
        if _old_attribute is None \
        or isstr(_old_attribute) \
        or isinstance(_old_attribute, (dict, list)):
            CONFIG[_key.lower()] = _value

    # Check for magic environment variables to override the contents of the config
    # file at runtime
    for _magic_variable in MAGIC_VARIABLES:
        _magic_value = os.environ.get("PYTOKIO_" + _magic_variable)
        if _magic_value is not None:
            try:
                _magic_value_decoded = json.loads(_magic_value)
            except ValueError:
                CONFIG[_magic_variable.lower()] = _magic_value
            else:
                CONFIG[_magic_variable.lower()] = _magic_value_decoded
Exemple #8
0
    def archive_mds_data(self, lmtdb):
        """Extract and encode data from LMT's MDS_DATA table

        Queries the LMT database, interprets resulting rows, and populates a
        dictionary of TimeSeries objects with those values.

        Args:
            lmtdb (LmtDb): database object
        """

        dataset_names = [
            'mdservers/cpuload',
        ]

        self.init_datasets(dataset_names, lmtdb.mds_names)

        # Now query the MDS_DATA table to get byte counts over the query time range
        results, columns = lmtdb.get_mds_data(self.query_start,
                                              self.query_end_plusplus)

        # Index the columns to speed up insertion of data
        col_map = {}
        try:
            for db_col in ['TIMESTAMP', 'MDS_ID', 'PCT_CPU']:
                col_map[db_col] = columns.index(db_col)
        except ValueError:
            raise ValueError("LMT database schema does not match expectation")

        # Loop through all the results of the timeseries query
        for row in results:
            if isstr(row[col_map['TIMESTAMP']]):
                # SQLite stores timestamps as a unicode string
                timestamp = datetime.datetime.strptime(
                    row[col_map['TIMESTAMP']], "%Y-%m-%d %H:%M:%S")
            else:
                # MySQL timestamps are automatically converted to datetime.datetime
                timestamp = row[col_map['TIMESTAMP']]
            target_name = lmtdb.mds_id_map[row[col_map['MDS_ID']]]
            for dataset_name in dataset_names:
                target_dbcol = self.config[dataset_name].get('column')
                # target_dbcol=PCT_CPU, target_name=snx11025n022
                if target_dbcol is not None:
                    self[dataset_name].insert_element(
                        timestamp, target_name, row[col_map[target_dbcol]])
                else:
                    errmsg = "%s in self.config but missing 'column' setting" % dataset_name
                    raise KeyError(errmsg)
Exemple #9
0
def reduce_diff(diff_dict):
    """
    Take the raw output of .diff() and aggregate the results of each device
    """
    reduced = {
        'sum': {},
        'min': {},
        'max': {},
        'count': {},
    }
    for counters in diff_dict['devices'].values():
        for counter, value in counters.items():
            if counter not in reduced['count']:
                reduced['count'][counter] = 1
                new = True
            else:
                reduced['count'][counter] += 1
                new = False

            if not isstr(value):
                if new:
                    reduced['sum'][counter] = value
                    reduced['min'][counter] = value
                    reduced['max'][counter] = value
                else:
                    reduced['sum'][counter] += value
                    reduced['min'][counter] = min(value,
                                                  reduced['min'][counter])
                    reduced['max'][counter] = max(value,
                                                  reduced['max'][counter])

    result = {}
    for reduction, counters in reduced.items():
        for counter, value in counters.items():
            reduced_key = "%s_%s" % (reduction, counter)
            result[reduced_key] = value
            if reduction == 'sum':
                reduced_key = "%s_%s" % ('ave', counter)
                result[reduced_key] = float(value) / reduced['count'][counter]

    return result
Exemple #10
0
def jobs_running_between(start, end, keys=None):
    """Generate a list of Slurm jobs that ran between a time range

    Args:
        start (datetime.datetime): Find jobs that ended at or after this time
        end (datetime.datetime): Find jobs that started at or before this time
        state (str): Any valid sacct state
        keys (list): List of Slurm fields to return for each running job

    Returns:
        tokio.connectors.slurm.Slurm: Slurm object containing jobs whose
        runtime overlapped with the start and end times
    """
    args = [
        '--parsable',
        '--starttime', start.strftime("%Y-%m-%dT%H:%M:%S"),
        '--endtime', end.strftime("%Y-%m-%dT%H:%M:%S"),
        '--state', 'R',
        '--allusers',
    ]

    if keys is None:
        args += ['--format', ','.join(DEFAULT_KEYS)]
    else:
        args += ['--format', ','.join(keys)]

    try:
        print(" ".join([SACCT] + args))
        output_str = subprocess.check_output([SACCT] + args)
        if not isstr(output_str):
            output_str = output_str.decode() # for python3
    except OSError as error:
        if error.errno == errno.ENOENT:
            raise type(error)(error.errno, "Slurm CLI (%s command) not found" % SCONTROL)
        raise

    return Slurm(from_string=output_str)
Exemple #11
0
def parse_counters_fileobj(fileobj, nodename=None):
    """Convert any output of ISDCT into key-value pairs.

    Reads the output of a file-like object which contains the output of a
    single isdct command.  Understands the output of the following options:

      * ``isdct show -smart`` (SMART attributes)
      * ``isdct show -sensor`` (device health sensors)
      * ``isdct show -performance`` (device performance metrics)
      * ``isdct show -a`` (drive info)

    Args:
        fileobj (file): file-like object containing the output of an ISDCT
            command
        nodename (str): name of node corresponding to `fileobj`, if known

    Returns:
        dict: dict of dicts keyed by the device serial number.
    """

    data = {}
    device_sn = None
    parse_mode = 0  # =0 for regular counters, 1 for SMART data
    smart_buffer = {}
    for line in fileobj.readlines():
        if not isstr(line):
            line = line.decode().strip()
        else:
            line = line.strip()

        if device_sn is None:
            rex_match = re.search(REX_SERIAL_NO, line)
            if rex_match is not None:
                device_sn = rex_match.group(2)
                if nodename is not None:
                    data['node_name'] = nodename
                if rex_match.group(1) == "Intel SSD" \
                or rex_match.group(1) == "SMART and Health Information":
                    parse_mode = 0
                elif rex_match.group(1) == "SMART Attributes":
                    parse_mode = 1
                else:
                    raise Exception("Unknown counter file format")
        elif parse_mode == 0 and ':' in line:
            key, val = line.split(':', 1)
            key = _normalize_key(key)
            data[key] = val.strip()
        elif parse_mode > 0 and ':' in line:
            key, val = line.split(':', 1)
            key = _normalize_key(key)
            smart_buffer[key] = val.strip()
        elif parse_mode > 0 and line.startswith('-') and line.endswith('-'):
            for key, val in _rekey_smart_buffer(smart_buffer).items():
                key = _normalize_key(key)
                data[key] = val
            smart_buffer = {'_id': line.split()[1]}
    if parse_mode > 0:  # flush the last SMART register
        for key, val in _rekey_smart_buffer(smart_buffer).items():
            key = _normalize_key(key)
            data[key] = val

    if device_sn is None:
        warnings.warn("Could not find device serial number in %s" %
                      fileobj.name)
        return {}

    return {device_sn: data}
Exemple #12
0
def _merge_parsed_counters(parsed_counters_list):
    """Merge ISDCT outputs into a single object.

    Aggregates counters from each record based on the NVMe device serial number,
    with redundant counters being overwritten.

    Args:
        parsed_counters_list (list): List of parsed ISDCT outputs as dicts.
            Each list element is a dict with a single key (a device serial
            number) and one or more values; each value is itself a dict of
            key-value pairs corresponding to ISDCT/SMART counters from that
            device.

    Returns:
        dict: Dict with keys given by all device serial numbers found in
        `parsed_counters_list` and whose values are a dict containing keys
        and values representing all unique keys across all elements of
        `parsed_counters_list`.
    """
    all_data = {}
    for parsed_counters in parsed_counters_list:
        if not parsed_counters:
            continue
        elif len(parsed_counters) > 1:
            raise Exception(
                "Received multiple serial numbers from parse_dct_counters_file"
            )
        else:
            device_sn = next(iter(parsed_counters))

        ### merge file's counter dict with any previous counters we've parsed
        if device_sn not in all_data:
            all_data[device_sn] = parsed_counters[device_sn]
        else:
            all_data[device_sn].update(parsed_counters[device_sn])

    ### attempt to figure out the type of each counter
    new_counters = []
    for device_sn, counters in all_data.items():
        for counter, value in counters.items():
            ### first, handle counters that do not have an obvious way to cast
            if counter in ("temperature", "throttle_status",
                           "endurance_analyzer"):
                tokens = value.split(None, 1)
                if len(tokens) == 2:
                    new_value, unit = tokens
                else:
                    new_value, unit = tokens[0], None
            else:
                new_value = value
                unit = None

            new_value = recast_string(value)

            ### endurance_analyzer can be numeric or an error string
            if counter == 'endurance_analyzer':
                if isstr(new_value):
                    new_value = None
                    unit = None
                elif unit is None:
                    # Intel reports this counter multiple times using different print formats
                    unit = 'years'

            ### Insert the key-value pair and its unit of measurement (if available)
            all_data[device_sn][counter] = new_value
            if unit is not None:
                new_counters.append((device_sn, '%s_unit' % counter, unit))

    for (device_sn, counter, value) in new_counters:
        all_data[device_sn][counter] = value

    return all_data
Exemple #13
0
    def diff(self, old_isdct, report_zeros=True):
        """Highlight differences between self and another NerscIsdct.

        Subtract each counter for each serial number in this object from its
        counterpart in ``old_isdct``.  Return the changes in each numeric counter
        and any serial numbers that have appeared or disappeared.

        Args:
            old_isdct (NerscIsdct): object with which we should be compared
            report_zeros (bool): If True, report all counters even if they
                showed no change.  Default is True.

        Returns:
            dict: Dictionary containing the following keys:

            * `added_devices` - device serial numbers which exist in self
              but not old_isdct
            * `removed_devices` - device serial numbers which do not exist
              in self but do in old_isdct
            * `devices` - dict keyed by device serial numbers and whose
              values are dicts of keys whose values are the difference
              between old_isdct and self
        """
        result = {
            'added_devices': [],
            'removed_devices': [],
            'devices': {},
        }
        existing_devices = set([])
        for serial_no, counters in self.items():
            existing_devices.add(serial_no)
            # new devices that are appearing for the first time
            if serial_no not in old_isdct:
                result['added_devices'].append(serial_no)
                continue

            # calculate the diff in each counter for this device
            diff_dict = {}
            for counter, value in counters.items():
                if counter not in old_isdct[serial_no]:
                    warnings.warn("Counter %s does not exist in old_isdct" %
                                  counter)

                # just highlight different strings, but ignore
                # endurance_analyzer (which can be numeric or string-like)
                elif isstr(value) and counter != "endurance_analyzer":
                    if old_isdct[serial_no][counter] != value:
                        diff_value = "+++%s ---%s" % (
                            value, old_isdct[serial_no][counter])
                    else:
                        diff_value = ""
                    if report_zeros or diff_value != "":
                        diff_dict[counter] = diff_value

                # subtract numeric counters
                else:
                    try:
                        diff_value = value - old_isdct[serial_no][counter]
                    except TypeError:
                        ### endurance_analyzer can be either numeric (common
                        ### case) or an error string (if the drive is brand
                        ### new); just drop the counter in the non-numeric
                        ### case
                        if counter == 'endurance_analyzer':
                            continue
                        error = "incompatible numeric types for %s/%s: [%s] vs [%s]" % (
                            serial_no, counter, old_isdct[serial_no][counter],
                            value)
                        raise TypeError(error)
                    if report_zeros or diff_value != 0:
                        diff_dict[counter] = diff_value

            result['devices'][serial_no] = diff_dict

        # Look for serial numbers that used to exist but do not appear in self
        for serial_no in old_isdct:
            if serial_no not in existing_devices:
                result['removed_devices'].append(serial_no)

        return result
Exemple #14
0
    def archive_mds_ops_data(self, lmtdb):
        """Extract and encode data from LMT's MDS_OPS_DATA table

        Queries the LMT database, interprets resulting rows, and populates a
        dictionary of TimeSeries objects with those values.  Avoids JOINing the
        MDS_VARIABLE_INFO table and instead uses an internal mapping of
        OPERATION_IDs to demultiplex the data in MDS_OPS_DATA into different
        HDF5 datasets.

        Args:
            lmtdb (LmtDb): database object
        """

        # mapping between OPERATION_INFO.OPERATION_NAME to HDF5 dataset names
        opname_to_dataset_name = {
            'open': 'mdtargets/opens',
            'close': 'mdtargets/closes',
            'mknod': 'mdtargets/mknods',
            'link': 'mdtargets/links',
            'unlink': 'mdtargets/unlinks',
            'mkdir': 'mdtargets/mkdirs',
            'rmdir': 'mdtargets/rmdirs',
            'rename': 'mdtargets/renames',
            'getxattr': 'mdtargets/getxattrs',
            'statfs': 'mdtargets/statfss',
            'setattr': 'mdtargets/setattrs',
            'getattr': 'mdtargets/getattrs',
        }
        dataset_names = list(opname_to_dataset_name.values())

        self.init_datasets(dataset_names, lmtdb.mds_names)

        results, columns = lmtdb.get_mds_ops_data(self.query_start,
                                                  self.query_end_plusplus)

        # Index the columns to speed up insertion of data
        col_map = {}
        try:
            for db_col in ['TIMESTAMP', 'MDS_ID', 'OPERATION_ID', 'SAMPLES']:
                col_map[db_col] = columns.index(db_col)
        except ValueError:
            raise ValueError("LMT database schema does not match expectation")

        # Loop through all the results of the timeseries query
        for row in results:
            if isstr(row[col_map['TIMESTAMP']]):
                # SQLite stores timestamps as a unicode string
                timestamp = datetime.datetime.strptime(
                    row[col_map['TIMESTAMP']], "%Y-%m-%d %H:%M:%S")
            else:
                # MySQL timestamps are automatically converted to datetime.datetime
                timestamp = row[col_map['TIMESTAMP']]

            # figure out the dataset this row's data will go into (this
            # implicitly filters out operations that aren't defined in
            # opname_to_dataset_name)
            op_name = lmtdb.mds_op_id_map[row[col_map['OPERATION_ID']]]
            dataset_name = opname_to_dataset_name.get(op_name)
            if dataset_name is None:
                continue

            # figure out which column (MDS name) this row's data will go into
            mds_name = lmtdb.mds_id_map.get(row[col_map['MDS_ID']])
            if not mds_name:
                errmsg = "unknown MDS_ID %s" % row[col_map['MDS_ID']]
                warnings.warn(errmsg)
                continue

            self[dataset_name].insert_element(timestamp, mds_name,
                                              row[col_map['SAMPLES']])