Example #1
0
    def html_addtable(self, table_data, table_title=None):
        """
        Convert DataFrame into an HTML table and add it to self.html_page
        - table_data: (DataFrame) source data for the HTML table
        - table_title: (string) title for the table in the HTML document
        """
        # Check provided data table
        try:
            self.check_df(table_data)
        except TypeError as err:
            error_exit(self.log, err)

        # Convert DataFrame to HTML
        table_page = self.html_dataframe(table_data)

        # Add CSS to HTML document
        table_css = table_page.head.style
        self.html_page.head.append(table_css)

        # Add table to HTML document
        table_container = self.html_page.find(id='tablescontainer')

        table_block = table_page.new_tag('div')
        table_block['class'] = 'tablecard'

        if table_title:
            table_block.append(table_page.new_tag('h2'))
            table_block.h2.string = table_title

        table_block.append(table_page.body.table)
        table_container.append(table_block)
        self.log.info("HTML page: data table added to report page")
Example #2
0
    def locate_config(self, configfile):
        """
        Determine location of config file, create it if it does not exist
        - configfile: (string) name of the config file
        """
        if os.path.isabs(configfile):
            # Use config file from absolute path
            self.usercfg = {
                'name': os.path.basename(configfile),
                'path': configfile,
            }
        elif configfile:
            # Locate config file and install it if necessary
            self.usercfg = {'name': configfile}

            # Check existence of config file in default directories
            tentative_configs = [os.path.join(confdir, self.usercfg['name']) for confdir in self.default_config_dirs]
            existing_configs = [config_path for config_path in tentative_configs if os.path.isfile(config_path)]

            if len(existing_configs) > 0:
                # Use config file from top hit
                self.usercfg.update({'path': existing_configs[0]})
                self.log.debug("Found existing configuration file: %s", self.usercfg['path'])
            else:
                # Install default config file in user's dir if config file is not found
                self.usercfg.update({'path': os.path.join(appdirs.user_config_dir(CONFIG_DIR), self.usercfg['name'])})
                self.copy_pkgdefault()
        else:
            error_exit(self.log, "Name of configuration file is needed")
def find_available_path(filepath):
    """
    Check if given path does exist. If path exists generate a sensible variant that does not exist.
    Make parent folders if necessary.
    - filepath: (string) absolute path to a existing or non-existing file
    """
    try:
        check_abspath(filepath)
    except ValueError as err:
        error_exit(logger, err)

    # Make parent directories as needed
    parent_dir = os.path.dirname(filepath)
    make_dir(parent_dir)

    # Look for a file name that does not exist
    # Make variants of file name until we find one available
    replica = 0
    tentative_path = filepath
    while os.path.lexists(tentative_path):
        replica += 1
        if replica < 10000:
            # Prepend replica number to extension
            tentative_pathcut = filepath.split('.')
            tentative_pathcut.insert(-1, f"{replica:04}")
            tentative_path = '.'.join(tentative_pathcut)
        else:
            errmsg = f"Reached maximum number of replicas. Cannot find an available file name in path: {filepath}"
            raise FileExistsError(errmsg)

    logger.debug("Found available file name at path: %s", tentative_path)
    return tentative_path
def copy_file(source, destination, force=False):
    """
    Copy file from source to destination if file in destination is missing
    Returns success of the copy operation
    - source: (string) absolute path to source file
    - destination: (string) absolute path to destination file
    - force: (boolean) copy file regardless of existence of destination
    """
    for filepath in [source, destination]:
        try:
            check_abspath(filepath)
        except ValueError as err:
            error_exit(logger, err)

    # Copy files if destination does not exist or force is enabled
    if not os.path.exists(destination) or force:
        try:
            shutil.copyfile(source, destination)
        except FileNotFoundError:
            logger.warning("Copy failed due to missing file: %s", source)
            return False
        except PermissionError:
            error_exit(
                logger,
                f"Permission denied to copy file '{source}' to '{destination}'"
            )
        else:
            logger.debug("File '%s' succesfully copied to '%s'", source,
                         destination)
            return True
    else:
        logger.debug("Nothing to copy, file already exists: %s", destination)
        return None
Example #5
0
    def set_output_paths(self, savedir=None):
        """
        Set output directory and define the path of output files for this object
        Paths will be based on the object ID, which will be modified as necessary to avoid filename collisions
        WARNING: do not set these paths too much in advance of any write operation
        - savedir: (string) path to directory to save output data
        """
        # Set output directory
        if savedir is not None and check_dir(savedir):
            self.savedir = savedir
        else:
            self.savedir = os.getcwd()

        # Use ID to set default paths for each output file
        self.output_path = dict()
        output_exts = ['html', 'pdf', 'png', 'svg', 'csv']
        for ext in output_exts:
            try:
                filepath = os.path.join(self.savedir, f"{self.id}.{ext}")
                filepath = find_available_path(filepath)
            except FileExistsError as err:
                error_exit(self.log, err)
            else:
                self.output_path.update({ext: filepath})
                self.log.debug("Default output path for %s files set to %s", ext.upper(), self.output_path[ext])
Example #6
0
    def output_csv(self, table=None, filename=None):
        """
        Save data frame in CSV format to 'csvfile'
        - table: (DataFrame or Series) alternative source data to save in the CSV
        - filename: (string) alternative name of the CSV file
        """
        if not isinstance(table, pd.DataFrame) and not isinstance(table, pd.Series):
            table = self.table.copy()

        if filename is None:
            csvpath = self.output_path['csv']
        else:
            try:
                csvpath = os.path.join(self.savedir, f"{filename}.csv")
                csvpath = find_available_path(csvpath)
            except FileExistsError as err:
                error_exit(self.log, err)
            else:
                self.log.debug("Using alternative path for CSV file output: %s", csvpath)

        # Add index names to the header row
        index_header = [idx.replace('_', ' ').title() for idx in table.index.names if idx]

        # Output data to CSV
        try:
            table.to_csv(csvpath, float_format='%.2f', header=True, index_label=index_header)
        except PermissionError:
            error_exit(f"Permission denied to save data in CSV format to {csvpath}")
        else:
            self.log.info(f"Data for '{self.title}' saved in CSV format to {csvpath}")
Example #7
0
 def output_html(self):
     """
     Save HTML document in self.html_page into a file
     """
     try:
         with open(self.output_path['html'], 'w') as htmlfile:
             htmlfile.write(self.html_page.prettify())
     except PermissionError as err:
         error_exit(f"Permission denied to write HTML file: {self.output_path['html']}")
     else:
         self.log.info(f"Report for '{self.title}' saved in HTML format to {self.output_path['html']}")
def parallel_exec(task,
                  label,
                  stack,
                  *args,
                  procs=None,
                  logger=None,
                  **kwargs):
    """
    Execute task in each item of stack in parallel
    Returns list with the resulting data
    - task: (method) function to pass to the parallel executor
    - label: (string) name of the task, used in log messages
    - stack: (iterable) list of items to be processed by task
    - procs: (int) number of processors
    - logger: (object) fancylogger object of the caller
    """
    if logger is None:
        logger = fancylogger.getLogger()

    # In Python 3.6, passing the logger to worker functions will make them non-picklable by ProcessPoolExecutor
    pyver = sys.version_info
    worker_logger = logger if pyver[0] >= 3 and pyver[1] >= 7 else None

    data_collection = list()

    # Start process pool to execute all items in the stack
    with futures.ProcessPoolExecutor(max_workers=procs) as executor:
        task_pool = {
            executor.submit(task, item, *args, logger=worker_logger, **kwargs):
            item
            for item in stack
        }
        for pid, completed_task in enumerate(futures.as_completed(task_pool)):
            try:
                data_batch = completed_task.result()
            except futures.process.BrokenProcessPool as err:
                # In Python 3.8+ there is also the exception futures.BrokenExecutor to consider
                error_exit(logger, f"{label}: process pool executor failed")
            except futures.CancelledError as err:
                # Child processes will be cancelled if any ends in error. Ignore error.
                logger.debug(f"{label}: process {pid} cancelled successfully")
                pass
            except SystemExit as exit:
                if exit.code == 1:
                    # Child process ended in error. Cancel all remaining processes in the pool.
                    cancel_process_pool(task_pool, pid, logger)
                    # Abort execution
                    errmsg = f"{label}: process {pid} failed. Aborting!"
                    error_exit(logger, errmsg)
            else:
                # Add counters to list
                data_collection.append(data_batch)

    return data_collection
Example #9
0
def get_updated_record(user_record, record_validity, vsc_token, logger=None):
    """
    Return user record with up to date information
    First check local cache. If missing or outdated check VSC account page
    - user_record: (tuple) username and its account record
    - record_validity: (int) number of days that user records are valid
    - vsc_token: (string) access token to VSC account page
    - logger: (object) fancylogger object of the caller
    """
    if logger is None:
        logger = fancylogger.getLogger()

    # Unpack user record
    (username, record_data) = user_record

    # Existing user
    if record_data:
        logger.debug(f"[{username}] user account record exists in local cache")

        try:
            # Calculate age of existing record
            # Once we can use Python 3.7+, the following can be replaced with date.fromisoformat()
            record_date = datetime.strptime(record_data['updated'],
                                            '%Y-%m-%d').date()
        except ValueError as err:
            errmsg = f"[{username}] user account record in local cache is malformed"
            error_exit(logger, errmsg)
        else:
            record_age = date.today() - record_date

        if record_age.days > record_validity:
            fresh_record = get_vsc_record(username, vsc_token)
            if fresh_record:
                # Update outdated record with data from VSC account page
                record_data.update(fresh_record)
                logger.debug(
                    f"[{username}] user account record updated from VSC account page"
                )
            else:
                # Account missing in VSC account page, keep existing record in our data base
                record_data['updated'] = date.today().isoformat()
    # New user
    else:
        # Retrieve full record from VSC account page
        record_data = get_vsc_record(username, vsc_token)
        if not record_data:
            # Generate a default record for users not present in VSC account page
            record_data = user_basic_record(username)
            logger.debug(
                f"[{username}] new user account registered as member of {record_data['site']}"
            )

    return {username: record_data}
Example #10
0
 def save_json(self):
     """
     Save contents to data file in JSON format
     """
     try:
         with open(self.datafile, 'w', encoding='utf8') as jsonfile:
             json.dump(self.contents, jsonfile, indent=4, ensure_ascii=False)
     except FileNotFoundError as err:
         error_exit(self.log, f"Data file not found: {self.datafile}")
     else:
         self.log.debug("Data saved to file: %s", self.datafile)
         return True
Example #11
0
 def set_units(self, units):
     """
     Change active compute units
     """
     try:
         self.active_units = self.known_units[units]
     except KeyError as err:
         errmsg = f"Unknown compute units {units}: {err}"
         error_exit(self.log, errmsg)
     else:
         self.log.debug("Compute units set to '%s'",
                        self.active_units['name'])
Example #12
0
 def setattr(self, target_name, local_data):
     """
     Wrapper around setattr with error handling
     - target_name: (string) name of attribute in ComputeTimeCount
     - local_data: (object) data to be saved in target attribute
     """
     try:
         setattr(self, target_name, local_data)
     except AttributeError as err:
         errmsg = f"Attribute {target_name} could not be set in ComputeTimeCount object"
         error_exit(self.log, errmsg)
     else:
         return True
Example #13
0
 def getattr(self, target_name):
     """
     Wrapper around getattr with error handling
     Returns existing attribute in ComputeTimeCount
     - target_name: (string) name of attribute in ComputeTimeCount
     """
     try:
         target_attr = getattr(self, target_name)
     except AttributeError as err:
         errmsg = f"Attribute {target_name} not found in ComputeTimeCount object"
         error_exit(self.log, errmsg)
     else:
         return target_attr
Example #14
0
    def __init__(self, datafile, mandatory=True, force_install=None):
        """
        Determine location of data file and read its contents
        Data files from package resources will be copied to user's data dir if needed
        - datafile: (string) name of the data file or full path to data file
        - mandatory: (boolean) mandatory files must already exist in user's data dir (or be installed)
        - force_install: (boolean) force copy of data file from package resources, superseeds FORCE_INSTALL
        """
        self.log = fancylogger.getLogger(name=self.__class__.__name__)

        # Fallback to FORCE_INSTALL if force_install is not set
        if force_install is None:
            force_install = FORCE_INSTALL

        if force_install:
            self.log.debug("Installation of data files is enforced")

        # Define paths holding package data files by order of preference
        # The 'data' folder in the package resources is set as a fallback location
        self.sys_data_dirs = (
            f'/etc/{DATA_DIR}',
            '/etc',
        )

        datafile = os.path.expanduser(datafile)

        if os.path.isabs(datafile):
            # Directly read data from absolute path
            self.datafile = datafile
            readable_file = True
        else:
            # Use datafile in user data directory
            self.datafile = os.path.join(appdirs.user_data_dir(DATA_DIR), datafile)
            # Copy data file from package contents (if it is missing in user's data dir or manually forced)
            # Failed copies are only fatal for mandatory data files
            try:
                self.install_pkgdata(datafile, force=force_install)
            except FileNotFoundError as err:
                readable_file = False
                if mandatory:
                    error_exit(self.log, err)
            else:
                readable_file = True

        if readable_file:
            # Read contents of data file
            try:
                self.read_data()
            except ValueError as err:
                error_exit(self.log, err)
Example #15
0
 def read_html(self):
     """
     Return contents of HTML file
     """
     try:
         with open(self.datafile, 'r') as htmlfile:
             htmldump = htmlfile.read()
             htmldata = BeautifulSoup(htmldump, 'lxml')
     # There are no other exeptions to check, bs4 will make HTML compliant anything that you throw at it
     except FileNotFoundError as err:
         error_exit(self.log, f"Data file not found: {self.datafile}")
     else:
         self.log.debug("Data read from file: %s", self.datafile)
         return htmldata
Example #16
0
 def read_json(self):
     """
     Return contents of JSON file
     """
     try:
         with open(self.datafile, 'r') as jsonfile:
             jsondata = json.load(jsonfile)
     except FileNotFoundError as err:
         error_exit(self.log, f"Data file not found: {self.datafile}")
     except json.decoder.JSONDecodeError as err:
         error_exit(self.log, f"Data file in JSON format is malformed: {self.datafile}")
     else:
         self.log.debug("Data read from file: %s", self.datafile)
         return jsondata
Example #17
0
    def load(self, configfile):
        """
        Load contents of configuration file
        - configfile: (string) name or path of the config file
        """
        # Determine location of config file
        self.locate_config(configfile)

        # Read contents of config file
        self.opts = configparser.ConfigParser()
        try:
            self.read()
        except FileNotFoundError as err:
            error_exit(self.log, err)

        return self
Example #18
0
    def init_db_cache(self):
        """
        Returns empty cache with db placeholder and default meta data
        """
        try:
            valid_days = MainConf.get_digit('userdb',
                                            'default_valid_days',
                                            fallback=30,
                                            mandatory=False)
        except (KeyError, ValueError) as err:
            error_exit(self.log, err)
        else:
            empty_db = {'valid_days': valid_days, 'db': dict()}
            self.log.info(
                f"Initialized empty data base of users with a validity of %s days",
                valid_days)

        return empty_db
Example #19
0
    def output_img(self, imgfmt='svg'):
        """
        Save plot image in 'imgfmt' format to the default output path
        Matplotlib object is closed after save as it is no longer needed and
        there is a limit of plot objects that can be open at the same time
        - imgfmt: (string) file format of the image
        """
        # Work with lowercase format extensions
        imgfmt = imgfmt.lower()

        # Save image file
        try:
            self.fig.savefig(self.output_path[imgfmt], format=imgfmt, bbox_inches='tight')
        except PermissionError:
            error_exit(f"Permission denied to save plot render: {imgpath}")
        else:
            self.log.info(f"Report for '{self.title}' saved in {imgfmt.upper()} format to {self.output_path[imgfmt]}")

        # Delete plot render
        plt.close(self.fig)
Example #20
0
 def job_seconds_to_compute(self, job_time, used_cores, days):
     """
     Returns compute time per day using the active compute units
     Warning: this function is structured to work with individual variables, pd.Series or pd.DataFrames that contain
              the following numerical parameters
     - job_time: (float) real used time in seconds
     - used_cores: (int) number of cores used during job_time
     - days: (int) number of days (used in normalized units)
     """
     try:
         total_compute_units = job_time * used_cores / self.active_units[
             'factor']
         if self.active_units['norm']:
             daily_compute_units = total_compute_units / days
     except ValueError as err:
         error_exit(
             self.log,
             f"Compute time unit conversion to {self.active_units['name']} failed: {err}"
         )
     else:
         return daily_compute_units
Example #21
0
    def __init__(self, query_id):
        """
        Set configuration options for the queries to ElasticSearch
        Establish connection to the server
        - query_id: (int) arbitrary identification number of the query
        """
        self.log = fancylogger.getLogger(name=self.__class__.__name__)

        # Set query ID
        try:
            self.id = str(query_id)
        except ValueError as err:
            error_exit(self.log, err)

        try:
            # URL of the ElasticSearch instance
            self.servers = MainConf.get('elasticsearch',
                                        'server_url').split(',')
            # Index parameters
            self.index = {
                'name': MainConf.get('elasticsearch', 'index_name'),
                'freq': MainConf.get('elasticsearch', 'index_freq'),
                'walltime': MainConf.get('elasticsearch', 'max_walltime'),
            }
        except KeyError as err:
            error_exit(logger, err)

        # Default field to retrieve and format of timestamps
        self.fields = ['@timestamp']
        self.timeformat = '%Y-%m-%dT%H:%M:%S.%fZ'

        try:
            self.client = Elasticsearch(hosts=self.servers)
            self.search = Search(using=self.client)
            es_cluster = self.client.cluster.health()
        except (ConnectionError, TransportError) as err:
            error_exit(
                self.log,
                f"ES query [{self.id}] connection to ElasticSearch server failed: {err}"
            )
        except ConnectionTimeout as err:
            error_exit(
                self.log,
                f"ES query [{self.id}] connection to ElasticSearch server timed out"
            )
        else:
            dbgmsg = "ES query [%s] connection established with ES cluster: %s"
            self.log.debug(dbgmsg, self.id, es_cluster['cluster_name'])
            self.log.debug("ES query [%s] status of ES cluster is %s", self.id,
                           es_cluster['status'])
Example #22
0
    def aggregate_perdate(self, source, selection, destination=None):
        """
        Aggregate data in selected column per each date in time interval
        Add/Update the aggregation to destination data frame as a new column prefixed with "total"
        - source: (string) name of ComputeTimeCounter attribute with the source data
        - selection: (string) name of column to aggregate
        - destination: (string) name of ComputeTimeCounter attribute to store aggregation
        """
        if not destination:
            destination = source

        source_data = self.getattr(source)
        dest_data = self.getattr(destination)

        # Execute aggregation per date
        try:
            aggregate = source_data.loc[:, selection].groupby('date').sum()
        except KeyError:
            errmsg = f"Aggregation per date failed: {selection} data not found in {source}"
            error_exit(self.log, errmsg)

        aggregate_name = 'total_{}'.format(selection)
        if aggregate_name in dest_data.columns:
            # Update existing data in destination
            dest_data.update(aggregate.rename(aggregate_name))
            aggregate_action = 'Updated'
        else:
            # Add aggregation as new data to destination
            dest_data = dest_data.join(aggregate.rename(aggregate_name))
            aggregate_action = 'Added'

        self.setattr(destination, dest_data)
        self.log.debug("%s aggregation of %s per date in %s succesfully",
                       aggregate_action, selection, destination)

        return True
def make_dir(dirpath):
    """
    Create directory in dir path if it does not exist
    - dirpath: (string) absolute path to directory
    """
    try:
        check_abspath(dirpath)
    except ValueError as err:
        error_exit(logger, err)

    try:
        os.makedirs(dirpath)
    except FileExistsError:
        if os.path.isdir(dirpath):
            logger.debug("Folder already exists: %s", dirpath)
            return False
        else:
            error_exit(logger, f"Path '{dirpath}' exists but is not a folder")
    except PermissionError:
        error_exit(logger, f"Permission denied to create folder: {dirpath}")
    else:
        logger.debug("Folder successfully created: %s", dirpath)
        return True
Example #24
0
def main():
    # Core command line arguments
    cli_core = argparse.ArgumentParser(prog='accounting-report', add_help=False)
    cli_core.add_argument(
        '-v', '--version', action='version', version='%(prog)s from vsc-accounting-brussel v{}'.format(VERSION)
    )
    cli_core.add_argument(
        '-d', dest='debug', help='use debug log level', required=False, action='store_true'
    )
    cli_core.add_argument(
        '-i',
        dest='force_install',
        help='force (re)installation of any data files needed from package resources',
        required=False,
        action='store_true',
    )
    cli_core.add_argument(
        '-c',
        dest='config_file',
        help='path to configuration file (default: ~/.config/vsc-accounting/vsc-accouning.ini)',
        default='vsc-accounting.ini',
        required=False,
    )

    cli_core_args, cli_extra_args = cli_core.parse_known_args()

    # Debug level logs
    if cli_core_args.debug:
        fancylogger.setLogLevelDebug()
        logger.debug("Switched logging to debug verbosity")

    # Load configuration
    MainConf.load(cli_core_args.config_file)

    # Enforce (re)installation of data files
    if cli_core_args.force_install:
        dataparser.FORCE_INSTALL = True

    # Read nodegroup specs and default values
    try:
        nodegroups_spec = MainConf.get('nodegroups', 'specsheet')
        nodegroups_default = MainConf.get('nodegroups', 'default').split(',')
    except KeyError as err:
        error_exit(logger, err)
    else:
        nodegroups = DataFile(nodegroups_spec).contents

    # Reporting command line arguments
    cli = argparse.ArgumentParser(
        description='Generate accurate accounting reports about the computational resources used in an HPC cluster',
        parents=[cli_core],
    )
    cli.add_argument(
        '-s',
        dest='start_date',
        help='data retrieved from START_DATE [YYYY-MM-DD] at 00:00',
        required=True,
        type=valid_isodate,
    )
    cli.add_argument(
        '-e',
        dest='end_date',
        help='data retrieved until END_DATE [YYYY-MM-DD] at 00:00 (default: today)',
        default=date.today(),
        required=False,
        type=valid_isodate,
    )
    cli.add_argument(
        '-r',
        dest='resolution',
        help='time resolution of the accounting (default: day)',
        choices=['year', 'quarter', 'month', 'week', 'day'],
        default='day',
        required=False,
    )
    cli.add_argument(
        '-f',
        dest='report_format',
        help='format of the report document (default: SVG)',
        choices=['html', 'pdf', 'png', 'svg'],
        default='svg',
        required=False,
    )
    cli.add_argument(
        '-t', dest='csv', help='write report data table in a CSV file', required=False, action='store_true',
    )
    cli.add_argument(
        '-o',
        dest='output_dir',
        help='path to store output files (default: print working directory)',
        default=None,
        required=False,
        type=valid_dirpath,
    )
    cli.add_argument(
        '-u',
        dest="compute_units",
        help='compute time units (default: corehours)',
        choices=['corehours', 'coredays'],
        default='corehours',
        required=False,
    )
    cli.add_argument(
        '-n',
        dest='node_groups',
        help='node groups to include in the accounting report',
        choices=[*nodegroups],
        nargs='*',
        default=nodegroups_default,
        required=False,
    )
    cli.add_argument(
        'reports',
        help='accounting reports to generate',
        choices=[
            'compute-time',
            'compute-percent',
            'running-jobs',
            'unique-users',
            'peruser-compute',
            'peruser-percent',
            'peruser-jobs',
            'perfield-compute',
            'perfield-percent',
            'perfield-jobs',
            'persite-compute',
            'persite-percent',
            'persite-jobs',
            'top-users',
            'top-users-percent',
            'top-fields',
            'top-fields-percent',
            'top-sites',
            'top-sites-percent',
        ],
        nargs='+',
    )

    # Read command line arguments
    cli_args = cli.parse_args()

    # Set absolute path of output directory
    if cli_args.output_dir:
        basedir = os.path.abspath(os.path.expanduser(cli_args.output_dir))
    else:
        basedir = os.getcwd()
    logger.debug("Output directory set to: %s", basedir)

    # Convert time resolution to pandas DateOffset format
    pd_date_offsets = {'day': 'D', 'week': 'W-MON', 'month': 'MS', 'quarter': 'QS', 'year': 'AS'}
    date_offset = pd_date_offsets[cli_args.resolution]

    # Selection of node groups
    nodegroup_list = list(set(cli_args.node_groups))  # go through a set to remove duplicates

    # Account compute time on each node group in the requested period
    ComputeTime = ComputeTimeCount(
        cli_args.start_date, cli_args.end_date, date_offset, compute_units=cli_args.compute_units
    )

    for ng in nodegroup_list:
        logger.info("Processing jobs on %s nodes...", ng)
        ComputeTime.add_nodegroup(ng, nodegroups[ng]['cores'], nodegroups[ng]['hosts'])

    # Colors of each nodegroup
    plot_colors = {ng: nodegroups[ng]['color'] for ng in nodegroup_list}

    # Generate requested accounting reports
    report_save = [basedir, cli_args.report_format, cli_args.csv]
    report_generators = {
        'compute-time': (report.compute_time, [ComputeTime, plot_colors] + report_save),
        'compute-percent': (report.compute_percent, [ComputeTime, plot_colors] + report_save),
        'running-jobs': (report.global_measure, [ComputeTime, 'Running Jobs', plot_colors] + report_save),
        'unique-users': (report.global_measure, [ComputeTime, 'Unique Users', plot_colors] + report_save),
        'peruser-compute': (report.aggregates, [ComputeTime, 'User', 'Compute', False, plot_colors] + report_save),
        'peruser-percent': (report.aggregates, [ComputeTime, 'User', 'Compute', True, plot_colors] + report_save),
        'peruser-jobs': (report.aggregates, [ComputeTime, 'User', 'Jobs', False, plot_colors] + report_save),
        'perfield-compute': (report.aggregates, [ComputeTime, 'Field', 'Compute', False, plot_colors] + report_save),
        'perfield-percent': (report.aggregates, [ComputeTime, 'Field', 'Compute', True, plot_colors] + report_save),
        'perfield-jobs': (report.aggregates, [ComputeTime, 'Field', 'Jobs', False, plot_colors] + report_save),
        'persite-compute': (report.aggregates, [ComputeTime, 'Site', 'Compute', False, plot_colors] + report_save),
        'persite-percent': (report.aggregates, [ComputeTime, 'Site', 'Compute', True, plot_colors] + report_save),
        'persite-jobs': (report.aggregates, [ComputeTime, 'Site', 'Jobs', False, plot_colors] + report_save),
        'top-users': (report.top_users, [ComputeTime, False] + report_save),
        'top-users-percent': (report.top_users, [ComputeTime, True] + report_save),
        'top-fields': (report.top_fields, [ComputeTime, False] + report_save),
        'top-fields-percent': (report.top_fields, [ComputeTime, True] + report_save),
        'top-sites': (report.top_sites, [ComputeTime, False] + report_save),
        'top-sites-percent': (report.top_sites, [ComputeTime, True] + report_save),
    }

    for requested_report in cli_args.reports:
        report_generators[requested_report][0](*report_generators[requested_report][1])
Example #25
0
    def html_dataframe(self, table):
        """
        Format DataFrame into an HTML table, generating a complete HTML document
        - table: (DataFrame) source data for the HTML table
        """
        # Work on a local copy of data table
        table = table.copy()

        # Format any Datetime indexes to ISO format
        for level in range(table.index.nlevels):
            idx = table.index.unique(level=level)
            if isinstance(idx, pd.DatetimeIndex):
                idx = idx.strftime('%Y-%m-%d')
                if table.index.nlevels > 1:
                    table.index = table.index.set_levels(idx, level=level)
                else:
                    table = table.set_index(idx)
                self.log.debug("HTML page: dates in index formatted in ISO format")

        # CSS style: take from file defined in configuration
        table_css_file = MainConf.get(
            'reports', 'html_table_cssfile', fallback='html_table_style.json', mandatory=False
        )
        table_css = DataFile(table_css_file, mandatory=True).contents
        self.log.debug(f"HTML page: added stylist rules to table from file: {table_css_file}")

        # CSS style: table zebra pattern
        zebra_bg = ('background', 'whitesmoke')
        if table.index.nlevels == 1:
            # Intermitent shading of single rows
            zebra_css = [{'selector': 'tbody tr:nth-of-type(odd)', 'props': [zebra_bg]}]
            self.log.debug(f"HTML page: applied zebra shading to every other row")
        else:
            # Intermitent shading of all rows beloging to each element in root index level
            rows = np.prod([len(level) for level in table.index.levels[1:]])
            zebra_css = [
                {'selector': f"tbody tr:nth-of-type({rows * 2}n-{shift})", 'props': [zebra_bg]} for shift in range(rows)
            ]
            self.log.debug("HTML page: applied zebra shading to every %s rows", rows)
        table_css.extend(zebra_css)

        # Delete names of each index level as it adds a second TH row
        table.index.names = [None for name in table.index.names]
        # Delete names of each column level as thous would be also printed along the column headers
        table.columns.names = [None for name in table.columns.names]

        # Format numbers
        table_format = dict()
        for column in table.columns:
            # Use names from all column levels
            if table.columns.nlevels > 1:
                column_name = " ".join(column)
            else:
                column_name = column

            if re.search('\(coredays.*\)', column_name):
                table_format.update({column: '{:.1f}'})
            elif re.search('\(.*%\)', column_name):
                table_format.update({column: '{:.2%}'})
            elif re.search('\(.*\)', column_name):
                # by default display data with units as integers
                table_format.update({column: '{:.0f}'})
            else:
                # data without units are treated as is
                table_format.update({column: '{}'})
        self.log.debug("HTML page: number formatting set per column of table to %s", table_format)

        # Get extra padding from configuration setting
        try:
            column_xtrlen = MainConf.get_digit('reports', 'html_table_extrapadding', fallback=2, mandatory=False)
        except (KeyError, ValueError) as err:
            error_exit(self.log, err)
        else:
            self.log.debug("HTML page: table cells extra padding set to %s", column_xtrlen)

        # Set lengths for each column based on formatted maximum value
        column_maxlen = [len(table_format[col].format(val)) for col, val in table.max(axis=0).to_dict().items()]
        column_width = [
            {'selector': f".col{col}", 'props': [('width', f"{column_maxlen[col] + column_xtrlen}em")]}
            for col in range(table.shape[1])
        ]
        table_css.extend(column_width)

        self.log.debug("HTML page: table column widths adjusted to %s", column_width)

        # Heatmap for data corresponding with the plot
        if self.yunits == '%':
            # color grade all columns with percentual data
            unitlabel = f"({self.yunits})"
            if table.columns.nlevels > 1:
                graded_cols = [col for col in table.columns if unitlabel in ''.join(col)]
            else:
                graded_cols = [col for col in table.columns if unitlabel in col]
            self.log.debug("HTML page: color graded all columns in table")
        elif self.ylab in table.columns:
            # color grade columns with data of plot
            graded_cols = [self.ylab]
            self.log.debug("HTML page: color graded column '%s'", self.ylab)
        else:
            graded_cols = None
            self.log.debug("HTML page: no color grading applied")

        # Data table printout
        table_styled = table.style.format(table_format).set_table_styles(table_css)
        self.log.debug("HTML page: table CSS style applied")

        if graded_cols:
            # Note: background_gradient accepts axis=None in pandas 0.25 and vmax in pandas 1.0
            # .background_gradient(cmap='YlGnBu', axis=None, subset=dataframe_slice, vmax=num)
            table_styled = table_styled.background_gradient(cmap='YlGnBu', axis='index', subset=graded_cols)
            self.log.debug("HTML page: table color gradient applied")

        table_html = table_styled.render()

        # Parse table html
        table_soup = BeautifulSoup(table_html, 'lxml')

        # Fusion cells with equal total values for all nodegroups
        th0 = table_soup.tbody.select('th.row_heading.level0')
        rowspan = int(th0[0]['rowspan']) if th0[0].has_attr('rowspan') else 1
        ngtotals = [f"col{col}" for col, name in enumerate(table.columns) if 'Total' in name]
        # Only proceed if level 0 index has rowspan and columns named 'Total' exist
        if rowspan > 1 and len(ngtotals) > 0:
            for ngtotal in ngtotals:
                column_total = table_soup.tbody.find_all('td', ngtotal)
                # Check if values in first group of rows are equal (assumes same topology accross the column)
                firstrow = [cell.string for cell in column_total[0:rowspan]]
                if all(cell == firstrow[0] for cell in firstrow):
                    # Add rowspan to each top cell
                    for row in range(0, len(column_total), rowspan):
                        column_total[row]['rowspan'] = rowspan
                        # Delete redundant cells
                        for span in range(1, rowspan):
                            column_total[row + span].decompose()
                    self.log.debug("HTML page: cells in column '%s' fusioned succesfully", ngtotal[3:])

        return table_soup
Example #26
0
    def __init__(self, title, table, ymax=None, colors=None, legend=None):
        """
        Initialize plot including axes, labels and legend
        > Plot object (matplotlib) is accessible in self.fig and self.ax
        > HTML page (beautifulsoup) is accessible in self.html_page
        - title: (string) main title of the plot
        - table: (DataFrame) data source for the plot
        - ymax: (numeric) maximum value of the Y axis
        - colors: (list of strings) color codes for each plot element
        - legend: (list of strings) alternative text elements of the legend

        Note: No default render() function defined. It is declared on child classes depending on the plot type.
        """
        self.log = fancylogger.getLogger(name=self.__class__.__name__)

        # Plot title
        try:
            cluster_name = MainConf.get('nodegroups', 'cluster_name')
        except KeyError as err:
            error_exit(self.log, err)
        else:
            self.title = f"{cluster_name}: {title}"

        # General plot format settings
        format_configs = dict()
        for format_config in ['plot_dpi', 'plot_fontsize']:
            try:
                format_value = MainConf.get_digit('reports', format_config)
            except (KeyError, ValueError) as err:
                error_exit(self.log, err)
            else:
                format_configs.update({format_config: format_value})

        # Font sizes are relative to 'plot_fontsize' configuration
        format_fontsize_mod = {
            'axes.titlesize': 4,
            'axes.labelsize': 0,
            'xtick.labelsize': -2,
            'ytick.labelsize': -2,
            'legend.fontsize': -4,
        }
        format_params = {fp: format_configs['plot_fontsize'] + fmod for fp, fmod in format_fontsize_mod.items()}
        # Add DPI setting
        format_params.update({'figure.dpi': format_configs['plot_dpi']})
        # Apply formats globally
        plt.rcParams.update(format_params)
        self.log.debug("Plot formatting set succesfully: %s", format_params)

        # Make local copy of data for the plot
        try:
            self.check_df(table)
        except TypeError as err:
            error_exit(self.log, err)
        else:
            self.table = table.copy()
            self.log.debug("Plot data table copied succesfully")

        # Plot date range
        if 'date' in self.table.index.names:
            dateidx = self.table.index.get_level_values('date').unique()
            self.datelim = (dateidx[0].date(), dateidx[-1].date())
            self.log.debug("Plot data range: %s to %s", *self.datelim)
        else:
            self.datelim = None

        # Plot measure is first column in index level 0
        if table.columns.nlevels > 1:
            self.ylab = self.table.columns.get_level_values(0)[0]
        else:
            self.ylab = self.table.columns[0]
        # Y axis scale and labels
        self.ymax = ymax
        self.yunits = re.search(r'\((.*?)\)', self.ylab)
        if self.yunits:
            self.yunits = self.yunits.group(1)
        # X axis labels
        self.xfreq = self.date_freq()
        self.xlab = f"Date ({self.xfreq})"
        self.log.debug("Plot labels: [X] %s [Y] %s", self.xlab, self.ylab)

        # Plot legend
        self.colors = colors
        self.legend = legend

        # Set plot ID from plot title plus index interval
        self.set_id()

        # Make the plot
        self.render()
        self.set_xaxis()
        self.set_yaxis()
        self.add_legend()
Example #27
0
def global_measure(ComputeTime,
                   selection,
                   colorlist,
                   savedir,
                   plotformat,
                   csv=False):
    """
    Number of total measures in GlobalStats in the given period
    Plot upper limit is maximum total 'measures' in the period
    - ComputeTime: (ComputeTimeFrame) source data for the plot
    - selection: (string) matching name of column to be plotted
    - colorlist: (dict) colors for each plot stack
    - savedir: (string) path of directory to store output
    - plotformat: (string) image format of the plot
    - csv: (boolean) save data used for the plot in CSV format
    """
    logger.info("Generating accounting report on %s...",
                selection.replace('_', ' '))
    plot = dict()

    # Names of selection has to be capitalized
    selection = selection.title()

    # Sum jobs and users per time period
    ComputeTime.aggregate_perdate('GlobalStats', 'running_jobs')
    ComputeTime.aggregate_perdate('GlobalStats', 'unique_users')

    # Full data table for the plot
    table_columns = [
        'compute_time', 'running_jobs', 'total_running_jobs', 'unique_users',
        'total_unique_users'
    ]
    table = ComputeTime.GlobalStats.loc[:, table_columns]

    # Format columns in the table
    units = [
        ComputeTime.compute_units['name'], 'jobs/day', 'jobs/day', 'users/day',
        'users/day'
    ]
    table = table.rename(columns=simple_names_units(table_columns, units))
    logger.debug("Data included in the report: %s", ", ".join(table.columns))

    # Data selection for the plot
    plot_data = [
        column for column in table.columns if re.match(selection, column)
    ]
    try:
        plot['table'] = table.loc[:, plot_data]
    except KeyError:
        error_exit(f"Data column for '{selection}' not found in GlobalStats")
    else:
        logger.debug("Data used in the plot: %s",
                     ", ".join(plot['table'].columns))

        plot['ymax'] = max(table.loc[:, plot_data[0]].groupby('date').sum())
        logger.debug("Maximum value of the plot: %s %s",
                     '{:.2f}'.format(plot['ymax']), plot['table'].columns[0])

    # Set colors for each nodegroup in the stack plot
    plot['colors'] = [
        colorlist[ng]
        for ng in ComputeTime.GlobalStats.index.unique(level='nodegroup')
    ]

    # Plot title: first column name without units
    plot['title'] = re.sub('\((.*?)\)', '', plot['table'].columns[0]).rstrip()

    # Render plot
    stackplot = PlotterStack(**plot)

    # Output: file paths
    stackplot.set_output_paths(savedir)
    # Output: render HTML document including plot data table
    if plotformat == 'html':
        table_title = "{} stats per nodegroup".format(
            stackplot.xfreq.capitalize())
        stackplot.html_makepage()
        stackplot.html_addtable(table, table_title)
    # Output: save files
    stackplot.save_plot(plotformat)
    if csv:
        stackplot.output_csv(table)
Example #28
0
    def add_nodegroup(self, nodegroup, cores, hostlist):
        """
        Add the definition of a new node group to the accounting of stats
        - nodegroup: (string) name of the new group of nodes
        - cores: (integer) number of cores per node
        - hostlist: (list of dicts) each element should include
                    {regex: pattern of hostnames, n: number of nodes, start: date string, end: date string}
        """
        # Check number of cores
        if str(cores).isdigit():
            self.log.debug("'%s' cores per host: %s", nodegroup, cores)
        else:
            errmsg = f"Cores per host of nodegroup '{nodegroup}' are not a positive integer"
            error_exit(self.log, errmsg)

        # Update nodegroup host list with cores per node and add missing start and end datetimes
        for n, host in enumerate(hostlist):
            hostlist[n].update({'cores': cores})
            try:
                hostlist[n]['start'] = pd.Timestamp(
                    host.get('start', date(2018, 1, 1)))
                hostlist[n]['end'] = pd.Timestamp(host.get(
                    'end', date.today()))
            except ValueError as err:
                errmsg = f"Dates of host {n} in nodegroup '{nodegroup}' are not in ISO format"
                error_exit(self.log, errmsg)
            else:
                dates_str = (
                    hostlist[n]['start'].strftime(self.dateformat),
                    hostlist[n]['end'].strftime(self.dateformat),
                )
                self.log.debug("'%s' host %s active period: %s to %s",
                               nodegroup, n, *dates_str)

        # Add group of nodes
        self.NG.update({nodegroup: hostlist})
        self.log.debug("'%s' nodegroup succesfully defined", nodegroup)

        # Create corresponding indexes for this group of nodes
        multidx = ['date', 'nodegroup']
        ng_index = pd.MultiIndex.from_product([self.dates, [nodegroup]],
                                              names=multidx)
        self.index = self.index.append(ng_index)

        # Start with capacity stats of this nodegroup
        ng_capacity = pd.DataFrame(
            [self.update_capacity(*dt) for dt in ng_index])
        ng_capacity = ng_capacity.set_index(multidx)
        self.log.debug("'%s' updated %s capacity records", nodegroup,
                       ng_capacity.shape[0])

        # Retrieve compute stats of this nodegroup
        ng_compute = parallel_exec(
            count_computejobsusers,  # worker function
            f"'{nodegroup}' compute/job counter",  # label prefixing log messages
            ng_index.levels[0],  # stack of items to process
            (nodegroup, self.NG[nodegroup]
             ),  # nodegroup_spec: forwarded to worker function
            procs=self.max_procs,
            logger=self.log,
            peruser=True,  # forwarded to worker function
        )
        # Serial version
        # ng_compute = [count_computejobsusers(n, *dt, peruser=True) for (n, dt) in enumerate(ng_index)]
        self.log.debug("'%s' retrieved %s compute time data records",
                       nodegroup, len(ng_compute))

        # Unpack compue stats and create data frame with global compute stats
        ng_global, ng_peruser = zip(*ng_compute)
        ng_global = pd.DataFrame(ng_global).set_index(multidx)
        ng_global = pd.merge(ng_capacity,
                             ng_global,
                             left_index=True,
                             right_index=True,
                             sort=True)
        self.GlobalStats = self.GlobalStats.combine_first(ng_global)
        self.log.debug("'%s' Global stats completed with %s data records",
                       nodegroup, self.GlobalStats.shape[0])

        # Unpack user stats and create data frame with user compute time and jobs
        ng_peruser = [(record['compute'], record['jobs'])
                      for record in ng_peruser]
        ng_peruser_compute, ng_peruser_jobs = zip(*ng_peruser)
        ng_peruser_compute = pd.DataFrame(ng_peruser_compute).set_index(
            multidx)
        ng_peruser_jobs = pd.DataFrame(ng_peruser_jobs).set_index(multidx)
        ng_peruser_counters = [('Compute', ng_peruser_compute),
                               ('Jobs', ng_peruser_jobs)]

        # Update list of active users with users from this nodegroup
        ng_users = set(ng_peruser_compute.columns)
        self.UserList.update(ng_users)
        self.log.debug("'%s' %s unique users added to accounting", nodegroup,
                       len(ng_users))

        # Retrieve account data for users in this nodegroup
        ng_user_accounts = pd.DataFrame.from_dict(UserDB(ng_users).records,
                                                  orient='index')
        ng_user_accounts.index.name = 'user'
        self.UserAccounts = self.UserAccounts.combine_first(ng_user_accounts)

        # Update user data and generate aggregates per field and site
        for counter_name, counter_data in ng_peruser_counters:
            # Order data by date
            counter_data.sort_index(level='date', ascending=True, inplace=True)
            # Add to respective data frame
            UserCounts = self.getattr('User' + counter_name)
            UserCounts = UserCounts.combine_first(counter_data).fillna(0)
            self.setattr('User' + counter_name, UserCounts)
            dbgmsg = "'%s' User %s stats completed with %s data records for %s users"
            self.log.debug(dbgmsg, nodegroup, counter_name.lower(),
                           len(counter_data.index), len(counter_data.columns))

            for category in ['Field', 'Site']:
                # Aggregate user data per category
                ng_percategory = self.aggregate_account_category(
                    counter_data, ng_user_accounts, category)
                aggregate_counts = (len(counter_data.columns),
                                    len(ng_percategory.columns))
                infomsg = "'%s' adding %s aggregates for %s users in %s '%s' categories"
                self.log.info(infomsg, nodegroup, counter_name.lower(),
                              *aggregate_counts, category)
                # Add aggregate to global data structure
                CategoryCounts = self.getattr(category + counter_name)
                CategoryCounts = CategoryCounts.combine_first(
                    ng_percategory).fillna(0)
                self.setattr(category + counter_name, CategoryCounts)
                # Update list of categories
                CategoryList = self.getattr(category + 'List')
                CategoryList.update(ng_percategory.columns)
                self.setattr(category + 'List', CategoryList)
Example #29
0
    def __init__(self,
                 date_start,
                 date_end,
                 date_freq,
                 compute_units='corehours'):
        """
        Inititalize data frames for the provided period of time
        - date_start, date_end: (date) limits of the period of time
        - date_freq: (pd.timedelta) string defining the frequency of time entries
        - compute_units: (string) units used to account compute time
        """
        self.log = fancylogger.getLogger(name=self.__class__.__name__)

        # Set global compute units and save them in here
        ComputeUnits.set_units(compute_units)
        self.compute_units = ComputeUnits.active_units

        # Use global date format
        self.dateformat = DATE_FORMAT

        # Set range of dates
        try:
            self.dates = self.set_dates(date_start, date_end, date_freq)
        except ValueError as err:
            error_exit(self.log, err)

        # Set number of procs for parallel processing from configuration file
        try:
            self.max_procs = MainConf.get_digit('nodegroups',
                                                'max_procs',
                                                fallback=None,
                                                mandatory=False)
        except (KeyError, ValueError) as err:
            error_exit(self.log, err)
        else:
            self.log.debug("Maximum number of processor set to %s",
                           self.max_procs)

        # Specifications of each groups of nodes
        self.NG = dict()

        # Index both dates and nodegroups (empty unless nodegroups are added)
        self.index = pd.MultiIndex.from_product([self.dates, []],
                                                names=['date', 'nodegroup'])

        # Compute time indexing both dates and nodegroups
        self.GlobalStats = pd.DataFrame(columns=[
            'capacity', 'compute_time', 'running_jobs', 'unique_users'
        ],
                                        index=self.index)

        # Aggregate stats (columns are dynamically added for each section)
        for section in ['User', 'Field', 'Site']:
            self.setattr(section + 'List', set())
            self.setattr(section + 'Compute', pd.DataFrame({},
                                                           index=self.index))
            self.setattr(section + 'Jobs', pd.DataFrame({}, index=self.index))

        # User account data
        self.UserAccounts = pd.DataFrame(
            columns=['user', 'field', 'site', 'updated'])
        self.UserAccounts = self.UserAccounts.set_index('user')

        self.log.debug("Global and aggregate data structures initialized")
Example #30
0
def aggregates(ComputeTime,
               aggregate,
               selection,
               percent,
               colorlist,
               savedir,
               plotformat,
               csv=False):
    """
    Compute time used by each entity in the chosen aggregate during the time period
    Gives insight on resources used by each entity
    Plot upper limit is maximum compute time of entity over all nodegroups
    - ComputeTime: (ComputeTimeFrame) source data for the plot
    - aggregate: (string) name of the aggregate data
    - selection: (string) name of the accounted data
    - percent: (boolean) plot percentual compute time
    - colorlist: (dict) colors for each plot stack
    - savedir: (string) path of directory to store output
    - plotformat: (string) image format of the plot
    - csv: (boolean) save data used for the plot in CSV format
    """
    # Names of aggregate and selection have to be capitalized
    aggregate = aggregate.title()
    selection = selection.title()

    # Source data for selected accounting and aggregate
    try:
        sources = source_data(selection, aggregate,
                              ComputeTime.compute_units['name'])
    except AttributeError as err:
        error_exit(logger, err)

    # List of entities in this aggregation
    aggregate_list = sorted(ComputeTime.getattr(aggregate + 'List'))
    # Add total compute time per time interval
    ComputeTime.aggregate_perdate('GlobalStats', sources['reference'],
                                  sources['aggregate'])
    # Calculate percentage compute time per entity
    for entity in aggregate_list:
        ComputeTime.add_percentage(sources['aggregate'], entity,
                                   sources['total'], f"{entity} - percent")
    # Grab stats for this aggregate
    AggregateStats = ComputeTime.getattr(sources['aggregate'])

    # Render plots for each entity
    plot = dict()

    # Set colors for each nodegroup in the stack plot
    plot['colors'] = [
        colorlist[ng]
        for ng in ComputeTime.GlobalStats.index.unique(level='nodegroup')
    ]

    # Iterate over each entity
    for entity in aggregate_list:
        logger.info("Generating accounting report on %s by %s: %s...",
                    selection, aggregate, entity)

        # Full data table for the plot
        entity_perc = f"{entity} - percent"
        table = AggregateStats.loc[:, [entity, entity_perc, sources['total']]]

        # Format columns in the table
        counter_name = sources['reference'].replace('_', ' ').title()
        column_names = {
            entity: f"{counter_name} of {entity} ({sources['units']})",
            entity_perc: f"{counter_name} of {entity} (%)",
            sources['total']: f"Total {counter_name} ({sources['units']})",
        }
        table = table.rename(columns=column_names)
        logger.debug("Data included in the report: %s",
                     ", ".join(table.columns))

        # Plot title and data selection
        if percent:
            plot['title'] = f"Relative {counter_name} of {entity}"
            plot['table'] = table.loc[:, [column_names[entity_perc]]]
        else:
            plot['title'] = f"{counter_name} of {entity}"
            plot['table'] = table.loc[:, [column_names[entity]]]
        logger.debug("Data used in the plot: %s",
                     ", ".join(plot['table'].columns))

        # Max value is set to max in the plot to avoid empty plots due to exagerated scales
        plot['ymax'] = plot['table'].iloc[:, 0].groupby('date').sum().max()
        ymax_fmt = '{:.2%}' if percent else '{:.2f}'
        logger.debug("Maximum value of the plot: %s %s",
                     ymax_fmt.format(plot['ymax']), plot['table'].columns[0])

        # Render plot
        stackplot = PlotterStack(**plot)

        # Output: file paths
        stackplot.set_output_paths(savedir)
        # Output: render HTML document including plot data table
        if plotformat == 'html':
            table_title = "{} stats per nodegroup".format(
                stackplot.xfreq.capitalize())
            stackplot.html_makepage()
            stackplot.html_addtable(table, table_title)
        # Output: save files
        stackplot.save_plot(plotformat)
        if csv:
            stackplot.output_csv(table)