def test_initlogger(self): """ Test function for init_logger """ my_logger = logger.init_logger() self.assertEqual(my_logger.level, logging.DEBUG) self.assertEqual(my_logger.name, logger.__name__) # Even if the loglevel is set to a different value, the logger loglevel # stays in 'DEBUG' my_logger = logger.init_logger(loglevel=logging.INFO, name='testset') self.assertEqual(my_logger.level, logging.DEBUG) self.assertEqual('testset', my_logger.name)
def __init__(self, alldays=False, logger=None, loglevel=None, nologs=False, safe=False, settings_file=None, **kwargs): self.alldays = alldays self.conf = arguments.read_config(settings_file) self.data = pd.DataFrame() self.filecache = {} self.logger = logger or init_logger(loglevel) self.logs = {} self.nologs = nologs self.results_queue = queue.Queue() self.safe = safe self.settings_file = settings_file or arguments.DEFAULT_SETTINGS_FILE self.server = None self.systems = [ item for item in self.conf.sections() if item not in ['GATEWAY', 'MISC'] ] add_methods_to_pandas_dataframe(self.logger)
def select(dataframe, *args, **kwargs): """ Get view of selected variables that match columns from the dataframe. Arguments: dataframe(pandas.DataFrame): Input data \*args(List[str]): List of regular expressions selecting column names Keyword Arguments: filter(str): Filter based on the index level and content, only one filter allowed. Example: ``system='SYSTEM1'`` excluded(List[str]): Exclusion list, items matching this list (case insensitive) will not be selected. logger(logging.Logger): Optional logger instance Returns: ``pandas.DataFrame`` """ logger = kwargs.pop('logger', '') or init_logger() excluded = kwargs.pop('excluded', None) (ix_level, filter_by) = kwargs.popitem() if kwargs else (None, None) ix_levels = [level.upper() for level in dataframe.index.names if level] if ix_level and ix_level.upper() not in ix_levels: logger.warning('Bad filter found: "{0}" not found in index ' '(case insensitive)'.format(ix_level)) return pd.DataFrame() if ix_level: ix_level = _find_in_iterable_case_insensitive( iterable=dataframe.index.names, name=ix_level) try: if not filter_by: # fallback if filter_by is not a valid value filter_by = dataframe.index.get_level_values( ix_level).unique()[0] filter_by = _find_in_iterable_case_insensitive( iterable=dataframe.index.get_level_values(ix_level), name=filter_by) _df = dataframe.xs( filter_by, level=ix_level) if filter_by else pd.DataFrame() except KeyError: logger.warning( 'Value: "{0}" not found in index level "{1}"!'.format( filter_by, ix_level)) return pd.DataFrame() else: _df = dataframe # Drop all columns that have all values missing _df.dropna(axis=1, how='all') if len(args) == 0: logger.warning( 'No variables were selected, returning all columns{0}'.format( ' for level {0}={1}'.format(ix_level, filter_by ) if filter_by else '')) return _df return _df[get_matching_columns(_df, *args, excluded=excluded)]
def plot_var_by_system(dataframe, *var_names, **optional): """ Replace pandas' ``DataFrame.plot()`` to allow plotting different systems in the same axis. ``var_names`` columns are selected for system in the dataframe and ``matplotlib.pyplot``'s plot function is used once for each column. """ logger = optional.pop('logger', '') or init_logger() plotaxis = optional.pop('ax', None) or plt.figure().gca() cmap = optional.pop('cmap', DFLT_COLORMAP) systems = dataframe.index.get_level_values('system').unique() for system in systems: sel = df_tools.select(dataframe, *var_names, system=system, logger=logger) if sel.empty: # other systems may have this column with some data continue # # Remove outliers (>3 std away from mean) # sel = df_tools.remove_outliers(sel.dropna(), n_std=3) for item in sel.columns: logger.debug('Drawing item: {0} ({1})'.format(item, system)) plotaxis = sel[item].plot(label='{0} {1}'.format(item, system), **optional) update_colors(plotaxis, cmap) return plotaxis
def plot_var(dataframe, *args, **kwargs): """ Plot the specified variable names from the dataframe overlaying all plots for each variable and silently skipping non-existing variables. Inherently remove outliers out from +/- 3 standard deviations away from mean. Arguments: dataframe (pandas.DataFrame): Source of the data to be plotted \*args (str): Filter column names that match any item in ``\*args``; each item can represent a valid regular expression. Keyword Arguments: system (Optional[str]): select which system to filter on (i.e. ``system='localhost'``) **kwargs (Optional): Keyword parameters passed transparently to pyplot Return: matplotlib.figure.Figure """ logger = kwargs.pop('logger', '') or init_logger() try: system_filter = kwargs.pop('system', '') assert not dataframe.empty # If we filter by system: only first column in var_names will be # selected, dataframe.plot() function will be used. if system_filter: sel = df_tools.select(dataframe, *args, system=system_filter, logger=logger) if sel.empty: raise TypeError # Remove outliers (>3 std away from mean) sel = df_tools.remove_outliers(sel.dropna(axis=1, how='all'), n_std=3) plotaxis = sel.plot(**kwargs) update_colors(plotaxis, kwargs.get('cmap', DFLT_COLORMAP)) else: plotaxis = plot_var_by_system(dataframe, *args, **kwargs) # Style the resulting plot axis and legend plotaxis.xaxis.set_major_formatter(md.DateFormatter('%d/%m/%y\n%H:%M')) plotaxis.legend(loc='best') return plotaxis except (TypeError, AssertionError): logger.error('{0}{1} not drawn{2}'.format( '{0} | '.format(system_filter) if system_filter else '', args, ' for this system' if system_filter else '')) except Exception as exc: item, item, exc_tb = sys.exc_info() logger.error('Exception at plot_var (line {0}): {1}'.format( exc_tb.tb_lineno, repr(exc))) # Return an empty figure if an exception was raised item = plt.figure() return item.gca()
def __setstate__(self, state): """ Method enabling class pickle """ state['logger'] = init_logger(name=state.get('loggername')) if 'loggername' in state: del state['loggername'] state['results_queue'] = queue.Queue() state['server'] = None self.__dict__.update(state)
def test_init_logger_has_two_handlers(self): my_logger = logger.init_logger() self.assertEqual(len(my_logger.handlers), 2) # Check there's one StreamHandler and one FileHandler self.assertTrue(any([isinstance(_handler, logging.StreamHandler) for _handler in my_logger.handlers])) self.assertTrue(any([isinstance(_handler, handlers.TimedRotatingFileHandler) for _handler in my_logger.handlers])) # Check that file handler's loglevel is the same as logger loglevel # Check that console handler's loglevel is the same as specified for _handler in my_logger.handlers: if isinstance(_handler, handlers.TimedRotatingFileHandler): self.assertEqual(_handler.level, my_logger.level) else: self.assertEqual(_handler.level, logger.DEFAULT_LOGLEVEL)
def __init__(self, container, system, logger=None): self.system = system # Transparently pass all container items for item in container.__dict__: setattr(self, item, getattr(container, item)) if 'loglevel' not in self.__dict__: self.loglevel = logger.DEFAULT_LOGLEVEL self.logger = logger or init_logger(self.loglevel) current_date = dt.datetime.strptime(self.date_time, "%d/%m/%Y %H:%M:%S") self.year = current_date.year # populate self.html_template and self.graphs_definition_file conf = arguments.read_config(self.settings_file) for item in ['html_template', 'graphs_definition_file']: setattr(self, item, arguments.get_absolute_path(conf.get('MISC', item), self.settings_file))
def __init__(self, container, system, logger=None): self.system = system # Transparently pass all container items for item in container.__dict__: setattr(self, item, getattr(container, item)) if 'loglevel' not in self.__dict__: self.loglevel = logger.DEFAULT_LOGLEVEL self.logger = logger or init_logger(self.loglevel) current_date = dt.datetime.strptime(self.date_time, "%d/%m/%Y %H:%M:%S") self.year = current_date.year # populate self.html_template and self.graphs_definition_file conf = arguments.read_config(self.settings_file) for item in ['html_template', 'graphs_definition_file']: setattr( self, item, arguments.get_absolute_path(conf.get('MISC', item), self.settings_file))
def dataframize(data_file, session=None, logger=None): """ Load CSV data into a pandas DataFrame. Return an empty DataFrame if fields and data are not correct, otherwise it will interpret it with NaN values. Column named :const:`DATETIME_TAG` (i.e. 'Sample Time') is used as index. It is common in T4 files to have several columns with a sample time, most probably as a product of an horizontal merge of different CSVs. In those cases the first column having 'Sample Time' on its name will be used. If ``session`` is not a valid SFTP session, work with local file system. Arguments: data_file (str): Input T4-CSV filename Keyword Arguments: session (Optional[SFTPClient]): Active SFTP session to a remote host logger (Optional[logging.Logger]): logging instance Return: pandas.DataFrame """ logger = logger or init_logger() logger.info('Loading file {0}...'.format(data_file)) try: if not isinstance(session, SFTPClient): session = builtins # open local file with session.open(data_file) as file_descriptor: _single_df = to_dataframe(*_extract_t4csv(file_descriptor)) return _single_df except IOError: # non-existing files also return an empty dataframe logger.error('File not found: {0}'.format(data_file)) return pd.DataFrame() except ExtractCSVException: logger.error( 'An error occurred while extracting the CSV file: {0}'.format( data_file)) return pd.DataFrame() except ToDfError: logger.error( 'Error occurred while processing CSV file: {0}'.format(data_file)) return pd.DataFrame()
def dataframize(data_file, session=None, logger=None): """ Load CSV data into a pandas DataFrame. Return an empty DataFrame if fields and data are not correct, otherwise it will interpret it with NaN values. Column named :const:`DATETIME_TAG` (i.e. 'Sample Time') is used as index. It is common in T4 files to have several columns with a sample time, most probably as a product of an horizontal merge of different CSVs. In those cases the first column having 'Sample Time' on its name will be used. If ``session`` is not a valid SFTP session, work with local file system. Arguments: data_file (str): Input T4-CSV filename Keyword Arguments: session (Optional[SFTPClient]): Active SFTP session to a remote host logger (Optional[logging.Logger]): logging instance Return: pandas.DataFrame """ logger = logger or init_logger() logger.info('Loading file {0}...'.format(data_file)) try: if not isinstance(session, SFTPClient): session = builtins # open local file with session.open(data_file) as file_descriptor: _single_df = to_dataframe(*_extract_t4csv(file_descriptor)) return _single_df except IOError: # non-existing files also return an empty dataframe logger.error('File not found: {0}'.format(data_file)) return pd.DataFrame() except ExtractCSVException: logger.error('An error occurred while extracting the CSV file: {0}' .format(data_file)) return pd.DataFrame() except ToDfError: logger.error('Error occurred while processing CSV file: {0}' .format(data_file)) return pd.DataFrame()
def add_methods_to_pandas_dataframe(logger=None): """ Add custom methods to pandas.DataFrame, allowing for example running :meth:`t4mon.calculations.apply_calcs` or :meth:`t4mon.calculations.clean_calcs` directly from any pandas DataFrame Arguments: logger (Optional[logging.Logger]): Optional logger object """ pd.DataFrame.oper = calculations.oper pd.DataFrame.oper_wrapper = calculations.oper_wrapper pd.DataFrame.recursive_lis = calculations.recursive_lis pd.DataFrame.apply_calcs = calculations.apply_calcs pd.DataFrame.clean_calcs = calculations.clean_calcs pd.DataFrame.logger = logger or init_logger() pd.DataFrame.select_var = df_tools.select pd.DataFrame.plot_var = gen_plot.plot_var pd.DataFrame.from_t4csv = __from_t4csv pd.DataFrame.from_t4zip = load_zipfile pd.DataFrame.to_t4csv = df_tools.dataframe_to_t4csv
def test_init_logger_has_two_handlers(self): my_logger = logger.init_logger() self.assertEqual(len(my_logger.handlers), 2) # Check there's one StreamHandler and one FileHandler self.assertTrue( any([ isinstance(_handler, logging.StreamHandler) for _handler in my_logger.handlers ])) self.assertTrue( any([ isinstance(_handler, handlers.TimedRotatingFileHandler) for _handler in my_logger.handlers ])) # Check that file handler's loglevel is the same as logger loglevel # Check that console handler's loglevel is the same as specified for _handler in my_logger.handlers: if isinstance(_handler, handlers.TimedRotatingFileHandler): self.assertEqual(_handler.level, my_logger.level) else: self.assertEqual(_handler.level, logger.DEFAULT_LOGLEVEL)
def __init__(self, logger=None, loglevel=None, noreports=False, settings_file=None, safe=False, **kwargs): self.data = pd.DataFrame() self.date_time = dt.date.strftime(dt.datetime.today(), "%d/%m/%Y %H:%M:%S") self.loglevel = loglevel self.logger = logger or init_logger(self.loglevel) self.logs = {} self.noreports = noreports self.reports_folder = None self.reports_written = [] self.settings_file = settings_file or arguments.DEFAULT_SETTINGS_FILE self.safe = safe self.store_folder = None self.systems = None self.kwargs = kwargs self._set_folders()
def __init__(self, alldays=False, logger=None, loglevel=None, nologs=False, safe=False, settings_file=None, **kwargs): self.alldays = alldays self.conf = arguments.read_config(settings_file) self.data = pd.DataFrame() self.filecache = {} self.logger = logger or init_logger(loglevel) self.logs = {} self.nologs = nologs self.results_queue = queue.Queue() self.safe = safe self.settings_file = settings_file or arguments.DEFAULT_SETTINGS_FILE self.server = None self.systems = [item for item in self.conf.sections() if item not in ['GATEWAY', 'MISC']] add_methods_to_pandas_dataframe(self.logger)
def read_pickle(name, compress=False, logger=None): """ Restore dataframe plus its metadata from (optionally deflated) pickle store Arguments: name(str): Input file name compress (Optional[boolean]): Whether or not the file is compressed (``True`` if file extension ends with '.gz'). Defaults to ``False``. logger (Optional[logging.Logger]): Optional logger object Return: ``Collector`` """ if compress or name.endswith('.gz'): mode = gzip else: mode = builtins optargs = {'encoding': 'latin1'} if six.PY3 else {} with mode.open(name, 'rb') as picklein: collector_ = cPickle.load(picklein, **optargs) if logger: collector_.logger = logger collector_.logger = logger or init_logger() return collector_
import unittest from os import path import numpy as np import pandas as pd from t4mon import logger from t4mon.arguments import read_config from t4mon.collector import (Collector, read_pickle, add_methods_to_pandas_dataframe) from t4mon.orchestrator import Orchestrator __all__ = ('BaseTestClass', 'OrchestratorSandbox', 'MY_DIR', 'LOGGER', 'TEST_CONFIG', 'BAD_CONFIG', 'TEST_CSV', 'TEST_DATAFRAME', 'TEST_GRAPHS_FILE', 'TEST_HTMLTEMPLATE', 'TEST_PKL') LOGGER = logger.init_logger(loglevel='DEBUG', name='test-t4mon') TEST_CONFIG = 'test/test_settings.cfg' MY_DIR = path.dirname(path.abspath(TEST_CONFIG)) BAD_CONFIG = 'test/test_settings_BAD.cfg' TEST_CALC = 'test/test_calc.cfg' TEST_CSV = 'test/test_data.csv' TEST_DATAFRAME = pd.DataFrame(np.random.randn(100, 4), columns=['test1', 'test2', 'test3', 'test4']) TEST_GRAPHS_FILE = 'test/test_graphs.cfg' TEST_HTMLTEMPLATE = 'test/test_template.html' TEST_ZIPFILE = 'test/test_t4.zip' TEST_PKL = 'test/test_data{}.pkl.gz'.format(sys.version_info[0]) def random_tag(n=5):
) from t4mon.orchestrator import Orchestrator __all__ = ('BaseTestClass', 'OrchestratorSandbox', 'MY_DIR', 'LOGGER', 'TEST_CONFIG', 'BAD_CONFIG', 'TEST_CSV', 'TEST_DATAFRAME', 'TEST_GRAPHS_FILE', 'TEST_HTMLTEMPLATE', 'TEST_PKL') LOGGER = logger.init_logger(loglevel='DEBUG', name='test-t4mon') TEST_CONFIG = 'test/test_settings.cfg' MY_DIR = path.dirname(path.abspath(TEST_CONFIG)) BAD_CONFIG = 'test/test_settings_BAD.cfg' TEST_CALC = 'test/test_calc.cfg' TEST_CSV = 'test/test_data.csv' TEST_DATAFRAME = pd.DataFrame(np.random.randn(100, 4), columns=['test1', 'test2', 'test3', 'test4']) TEST_GRAPHS_FILE = 'test/test_graphs.cfg' TEST_HTMLTEMPLATE = 'test/test_template.html' TEST_ZIPFILE = 'test/test_t4.zip' TEST_PKL = 'test/test_data{}.pkl.gz'.format(sys.version_info[0])
def __setstate__(self, state): """ """ state['logger'] = init_logger(name=state.get('loggername')) self.__dict__.update(state)
def select(dataframe, *args, **kwargs): """ Get view of selected variables that match columns from the dataframe. Arguments: dataframe(pandas.DataFrame): Input data \*args(List[str]): List of regular expressions selecting column names Keyword Arguments: filter(str): Filter based on the index level and content, only one filter allowed. Example: ``system='SYSTEM1'`` excluded(List[str]): Exclusion list, items matching this list (case insensitive) will not be selected. logger(logging.Logger): Optional logger instance Returns: ``pandas.DataFrame`` """ logger = kwargs.pop('logger', '') or init_logger() excluded = kwargs.pop('excluded', None) (ix_level, filter_by) = kwargs.popitem() if kwargs else (None, None) ix_levels = [level.upper() for level in dataframe.index.names if level] if ix_level and ix_level.upper() not in ix_levels: logger.warning('Bad filter found: "{0}" not found in index ' '(case insensitive)'.format(ix_level)) return pd.DataFrame() if ix_level: ix_level = _find_in_iterable_case_insensitive( iterable=dataframe.index.names, name=ix_level ) try: if not filter_by: # fallback if filter_by is not a valid value filter_by = dataframe.index.get_level_values( ix_level ).unique()[0] filter_by = _find_in_iterable_case_insensitive( iterable=dataframe.index.get_level_values(ix_level), name=filter_by ) _df = dataframe.xs(filter_by, level=ix_level) if filter_by else pd.DataFrame() except KeyError: logger.warning('Value: "{0}" not found in index level "{1}"!' .format(filter_by, ix_level)) return pd.DataFrame() else: _df = dataframe # Drop all columns that have all values missing _df.dropna(axis=1, how='all') if len(args) == 0: logger.warning('No variables were selected, returning all columns{0}' .format(' for level {0}={1}'.format(ix_level, filter_by) if filter_by else '')) return _df return _df[get_matching_columns(_df, *args, excluded=excluded)]