Esempio n. 1
0
 def test_initlogger(self):
     """ Test function for init_logger """
     my_logger = logger.init_logger()
     self.assertEqual(my_logger.level, logging.DEBUG)
     self.assertEqual(my_logger.name, logger.__name__)
     # Even if the loglevel is set to a different value, the logger loglevel
     # stays in 'DEBUG'
     my_logger = logger.init_logger(loglevel=logging.INFO, name='testset')
     self.assertEqual(my_logger.level, logging.DEBUG)
     self.assertEqual('testset', my_logger.name)
Esempio n. 2
0
 def test_initlogger(self):
     """ Test function for init_logger """
     my_logger = logger.init_logger()
     self.assertEqual(my_logger.level, logging.DEBUG)
     self.assertEqual(my_logger.name, logger.__name__)
     # Even if the loglevel is set to a different value, the logger loglevel
     # stays in 'DEBUG'
     my_logger = logger.init_logger(loglevel=logging.INFO, name='testset')
     self.assertEqual(my_logger.level, logging.DEBUG)
     self.assertEqual('testset', my_logger.name)
Esempio n. 3
0
    def __init__(self,
                 alldays=False,
                 logger=None,
                 loglevel=None,
                 nologs=False,
                 safe=False,
                 settings_file=None,
                 **kwargs):

        self.alldays = alldays
        self.conf = arguments.read_config(settings_file)
        self.data = pd.DataFrame()
        self.filecache = {}
        self.logger = logger or init_logger(loglevel)
        self.logs = {}
        self.nologs = nologs
        self.results_queue = queue.Queue()
        self.safe = safe
        self.settings_file = settings_file or arguments.DEFAULT_SETTINGS_FILE
        self.server = None
        self.systems = [
            item for item in self.conf.sections()
            if item not in ['GATEWAY', 'MISC']
        ]
        add_methods_to_pandas_dataframe(self.logger)
Esempio n. 4
0
def select(dataframe, *args, **kwargs):
    """
    Get view of selected variables that match columns from the dataframe.

    Arguments:
        dataframe(pandas.DataFrame): Input data
        \*args(List[str]): List of regular expressions selecting column names
    Keyword Arguments:
        filter(str):
            Filter based on the index level and content, only one filter
            allowed. Example: ``system='SYSTEM1'``
        excluded(List[str]):
            Exclusion list, items matching this list (case insensitive) will
            not be selected.
        logger(logging.Logger): Optional logger instance

    Returns:
        ``pandas.DataFrame``


    """
    logger = kwargs.pop('logger', '') or init_logger()
    excluded = kwargs.pop('excluded', None)
    (ix_level, filter_by) = kwargs.popitem() if kwargs else (None, None)
    ix_levels = [level.upper() for level in dataframe.index.names if level]
    if ix_level and ix_level.upper() not in ix_levels:
        logger.warning('Bad filter found: "{0}" not found in index '
                       '(case insensitive)'.format(ix_level))
        return pd.DataFrame()

    if ix_level:
        ix_level = _find_in_iterable_case_insensitive(
            iterable=dataframe.index.names, name=ix_level)
        try:
            if not filter_by:  # fallback if filter_by is not a valid value
                filter_by = dataframe.index.get_level_values(
                    ix_level).unique()[0]
            filter_by = _find_in_iterable_case_insensitive(
                iterable=dataframe.index.get_level_values(ix_level),
                name=filter_by)
            _df = dataframe.xs(
                filter_by, level=ix_level) if filter_by else pd.DataFrame()
        except KeyError:
            logger.warning(
                'Value: "{0}" not found in index level "{1}"!'.format(
                    filter_by, ix_level))
            return pd.DataFrame()
    else:
        _df = dataframe

    # Drop all columns that have all values missing
    _df.dropna(axis=1, how='all')

    if len(args) == 0:
        logger.warning(
            'No variables were selected, returning all columns{0}'.format(
                ' for level {0}={1}'.format(ix_level, filter_by
                                            ) if filter_by else ''))
        return _df
    return _df[get_matching_columns(_df, *args, excluded=excluded)]
Esempio n. 5
0
def plot_var_by_system(dataframe, *var_names, **optional):
    """
    Replace pandas' ``DataFrame.plot()`` to allow plotting different systems in
    the same axis.

    ``var_names`` columns are selected for system in the dataframe
    and ``matplotlib.pyplot``'s plot function is used once for each column.
    """
    logger = optional.pop('logger', '') or init_logger()
    plotaxis = optional.pop('ax', None) or plt.figure().gca()
    cmap = optional.pop('cmap', DFLT_COLORMAP)
    systems = dataframe.index.get_level_values('system').unique()
    for system in systems:
        sel = df_tools.select(dataframe,
                              *var_names,
                              system=system,
                              logger=logger)
        if sel.empty:  # other systems may have this column with some data
            continue
        # # Remove outliers (>3 std away from mean)
        # sel = df_tools.remove_outliers(sel.dropna(), n_std=3)
        for item in sel.columns:
            logger.debug('Drawing item: {0} ({1})'.format(item, system))
            plotaxis = sel[item].plot(label='{0} {1}'.format(item, system),
                                      **optional)
    update_colors(plotaxis, cmap)
    return plotaxis
Esempio n. 6
0
def plot_var(dataframe, *args, **kwargs):
    """
    Plot the specified variable names from the dataframe overlaying
    all plots for each variable and silently skipping non-existing variables.

    Inherently remove outliers out from +/- 3 standard deviations away
    from mean.

    Arguments:
        dataframe (pandas.DataFrame): Source of the data to be plotted
        \*args (str):
            Filter column names that match any item in ``\*args``; each
            item can represent a valid regular expression.
    Keyword Arguments:
        system (Optional[str]):
            select which system to filter on (i.e. ``system='localhost'``)
        **kwargs (Optional):
            Keyword parameters passed transparently to pyplot

    Return:
        matplotlib.figure.Figure
    """
    logger = kwargs.pop('logger', '') or init_logger()

    try:
        system_filter = kwargs.pop('system', '')
        assert not dataframe.empty
        # If we filter by system: only first column in var_names will be
        # selected, dataframe.plot() function will be used.
        if system_filter:
            sel = df_tools.select(dataframe,
                                  *args,
                                  system=system_filter,
                                  logger=logger)
            if sel.empty:
                raise TypeError
            # Remove outliers (>3 std away from mean)
            sel = df_tools.remove_outliers(sel.dropna(axis=1, how='all'),
                                           n_std=3)
            plotaxis = sel.plot(**kwargs)
            update_colors(plotaxis, kwargs.get('cmap', DFLT_COLORMAP))
        else:
            plotaxis = plot_var_by_system(dataframe, *args, **kwargs)

        # Style the resulting plot axis and legend
        plotaxis.xaxis.set_major_formatter(md.DateFormatter('%d/%m/%y\n%H:%M'))
        plotaxis.legend(loc='best')
        return plotaxis
    except (TypeError, AssertionError):
        logger.error('{0}{1} not drawn{2}'.format(
            '{0} | '.format(system_filter) if system_filter else '', args,
            ' for this system' if system_filter else ''))
    except Exception as exc:
        item, item, exc_tb = sys.exc_info()
        logger.error('Exception at plot_var (line {0}): {1}'.format(
            exc_tb.tb_lineno, repr(exc)))
    # Return an empty figure if an exception was raised
    item = plt.figure()
    return item.gca()
Esempio n. 7
0
 def __setstate__(self, state):
     """ Method enabling class pickle """
     state['logger'] = init_logger(name=state.get('loggername'))
     if 'loggername' in state:
         del state['loggername']
     state['results_queue'] = queue.Queue()
     state['server'] = None
     self.__dict__.update(state)
Esempio n. 8
0
 def __setstate__(self, state):
     """ Method enabling class pickle """
     state['logger'] = init_logger(name=state.get('loggername'))
     if 'loggername' in state:
         del state['loggername']
     state['results_queue'] = queue.Queue()
     state['server'] = None
     self.__dict__.update(state)
Esempio n. 9
0
    def test_init_logger_has_two_handlers(self):
        my_logger = logger.init_logger()
        self.assertEqual(len(my_logger.handlers), 2)
        # Check there's one StreamHandler and one FileHandler
        self.assertTrue(any([isinstance(_handler, logging.StreamHandler)
                             for _handler in my_logger.handlers]))
        self.assertTrue(any([isinstance(_handler,
                                        handlers.TimedRotatingFileHandler)
                             for _handler in my_logger.handlers]))

        # Check that file handler's loglevel is the same as logger loglevel
        # Check that console handler's loglevel is the same as specified
        for _handler in my_logger.handlers:
            if isinstance(_handler, handlers.TimedRotatingFileHandler):
                self.assertEqual(_handler.level, my_logger.level)
            else:
                self.assertEqual(_handler.level,
                                 logger.DEFAULT_LOGLEVEL)
Esempio n. 10
0
 def __init__(self, container, system, logger=None):
     self.system = system
     # Transparently pass all container items
     for item in container.__dict__:
         setattr(self, item, getattr(container, item))
     if 'loglevel' not in self.__dict__:
         self.loglevel = logger.DEFAULT_LOGLEVEL
     self.logger = logger or init_logger(self.loglevel)
     current_date = dt.datetime.strptime(self.date_time,
                                         "%d/%m/%Y %H:%M:%S")
     self.year = current_date.year
     # populate self.html_template and self.graphs_definition_file
     conf = arguments.read_config(self.settings_file)
     for item in ['html_template', 'graphs_definition_file']:
         setattr(self,
                 item,
                 arguments.get_absolute_path(conf.get('MISC', item),
                                             self.settings_file))
Esempio n. 11
0
 def __init__(self, container, system, logger=None):
     self.system = system
     # Transparently pass all container items
     for item in container.__dict__:
         setattr(self, item, getattr(container, item))
     if 'loglevel' not in self.__dict__:
         self.loglevel = logger.DEFAULT_LOGLEVEL
     self.logger = logger or init_logger(self.loglevel)
     current_date = dt.datetime.strptime(self.date_time,
                                         "%d/%m/%Y %H:%M:%S")
     self.year = current_date.year
     # populate self.html_template and self.graphs_definition_file
     conf = arguments.read_config(self.settings_file)
     for item in ['html_template', 'graphs_definition_file']:
         setattr(
             self, item,
             arguments.get_absolute_path(conf.get('MISC', item),
                                         self.settings_file))
Esempio n. 12
0
def dataframize(data_file, session=None, logger=None):
    """
    Load CSV data into a pandas DataFrame.

    Return an empty DataFrame if fields and data are not correct,
    otherwise it will interpret it with NaN values.

    Column named :const:`DATETIME_TAG` (i.e. 'Sample Time') is used as index.
    It is common in T4 files to have several columns with a sample time, most
    probably as a product of an horizontal merge of different CSVs. In those
    cases the first column having 'Sample Time' on its name will be used.


    If ``session`` is not a valid SFTP session, work with local file system.

    Arguments:
        data_file (str): Input T4-CSV filename
    Keyword Arguments:
        session (Optional[SFTPClient]): Active SFTP session to a remote host
        logger (Optional[logging.Logger]): logging instance
    Return:
        pandas.DataFrame
    """

    logger = logger or init_logger()
    logger.info('Loading file {0}...'.format(data_file))
    try:
        if not isinstance(session, SFTPClient):
            session = builtins  # open local file
        with session.open(data_file) as file_descriptor:
            _single_df = to_dataframe(*_extract_t4csv(file_descriptor))
        return _single_df
    except IOError:  # non-existing files also return an empty dataframe
        logger.error('File not found: {0}'.format(data_file))
        return pd.DataFrame()
    except ExtractCSVException:
        logger.error(
            'An error occurred while extracting the CSV file: {0}'.format(
                data_file))
        return pd.DataFrame()
    except ToDfError:
        logger.error(
            'Error occurred while processing CSV file: {0}'.format(data_file))
        return pd.DataFrame()
Esempio n. 13
0
def dataframize(data_file, session=None, logger=None):
    """
    Load CSV data into a pandas DataFrame.

    Return an empty DataFrame if fields and data are not correct,
    otherwise it will interpret it with NaN values.

    Column named :const:`DATETIME_TAG` (i.e. 'Sample Time') is used as index.
    It is common in T4 files to have several columns with a sample time, most
    probably as a product of an horizontal merge of different CSVs. In those
    cases the first column having 'Sample Time' on its name will be used.


    If ``session`` is not a valid SFTP session, work with local file system.

    Arguments:
        data_file (str): Input T4-CSV filename
    Keyword Arguments:
        session (Optional[SFTPClient]): Active SFTP session to a remote host
        logger (Optional[logging.Logger]): logging instance
    Return:
        pandas.DataFrame
    """

    logger = logger or init_logger()
    logger.info('Loading file {0}...'.format(data_file))
    try:
        if not isinstance(session, SFTPClient):
            session = builtins  # open local file
        with session.open(data_file) as file_descriptor:
            _single_df = to_dataframe(*_extract_t4csv(file_descriptor))
        return _single_df
    except IOError:  # non-existing files also return an empty dataframe
        logger.error('File not found: {0}'.format(data_file))
        return pd.DataFrame()
    except ExtractCSVException:
        logger.error('An error occurred while extracting the CSV file: {0}'
                     .format(data_file))
        return pd.DataFrame()
    except ToDfError:
        logger.error('Error occurred while processing CSV file: {0}'
                     .format(data_file))
        return pd.DataFrame()
Esempio n. 14
0
def add_methods_to_pandas_dataframe(logger=None):
    """
    Add custom methods to pandas.DataFrame, allowing for example running
    :meth:`t4mon.calculations.apply_calcs` or
    :meth:`t4mon.calculations.clean_calcs` directly from any pandas DataFrame

    Arguments:
        logger (Optional[logging.Logger]): Optional logger object
    """
    pd.DataFrame.oper = calculations.oper
    pd.DataFrame.oper_wrapper = calculations.oper_wrapper
    pd.DataFrame.recursive_lis = calculations.recursive_lis
    pd.DataFrame.apply_calcs = calculations.apply_calcs
    pd.DataFrame.clean_calcs = calculations.clean_calcs
    pd.DataFrame.logger = logger or init_logger()
    pd.DataFrame.select_var = df_tools.select
    pd.DataFrame.plot_var = gen_plot.plot_var
    pd.DataFrame.from_t4csv = __from_t4csv
    pd.DataFrame.from_t4zip = load_zipfile
    pd.DataFrame.to_t4csv = df_tools.dataframe_to_t4csv
Esempio n. 15
0
def add_methods_to_pandas_dataframe(logger=None):
    """
    Add custom methods to pandas.DataFrame, allowing for example running
    :meth:`t4mon.calculations.apply_calcs` or
    :meth:`t4mon.calculations.clean_calcs` directly from any pandas DataFrame

    Arguments:
        logger (Optional[logging.Logger]): Optional logger object
    """
    pd.DataFrame.oper = calculations.oper
    pd.DataFrame.oper_wrapper = calculations.oper_wrapper
    pd.DataFrame.recursive_lis = calculations.recursive_lis
    pd.DataFrame.apply_calcs = calculations.apply_calcs
    pd.DataFrame.clean_calcs = calculations.clean_calcs
    pd.DataFrame.logger = logger or init_logger()
    pd.DataFrame.select_var = df_tools.select
    pd.DataFrame.plot_var = gen_plot.plot_var
    pd.DataFrame.from_t4csv = __from_t4csv
    pd.DataFrame.from_t4zip = load_zipfile
    pd.DataFrame.to_t4csv = df_tools.dataframe_to_t4csv
Esempio n. 16
0
    def test_init_logger_has_two_handlers(self):
        my_logger = logger.init_logger()
        self.assertEqual(len(my_logger.handlers), 2)
        # Check there's one StreamHandler and one FileHandler
        self.assertTrue(
            any([
                isinstance(_handler, logging.StreamHandler)
                for _handler in my_logger.handlers
            ]))
        self.assertTrue(
            any([
                isinstance(_handler, handlers.TimedRotatingFileHandler)
                for _handler in my_logger.handlers
            ]))

        # Check that file handler's loglevel is the same as logger loglevel
        # Check that console handler's loglevel is the same as specified
        for _handler in my_logger.handlers:
            if isinstance(_handler, handlers.TimedRotatingFileHandler):
                self.assertEqual(_handler.level, my_logger.level)
            else:
                self.assertEqual(_handler.level, logger.DEFAULT_LOGLEVEL)
Esempio n. 17
0
    def __init__(self,
                 logger=None,
                 loglevel=None,
                 noreports=False,
                 settings_file=None,
                 safe=False,
                 **kwargs):
        self.data = pd.DataFrame()
        self.date_time = dt.date.strftime(dt.datetime.today(),
                                          "%d/%m/%Y %H:%M:%S")
        self.loglevel = loglevel
        self.logger = logger or init_logger(self.loglevel)
        self.logs = {}
        self.noreports = noreports
        self.reports_folder = None
        self.reports_written = []
        self.settings_file = settings_file or arguments.DEFAULT_SETTINGS_FILE
        self.safe = safe
        self.store_folder = None
        self.systems = None
        self.kwargs = kwargs

        self._set_folders()
Esempio n. 18
0
    def __init__(self,
                 alldays=False,
                 logger=None,
                 loglevel=None,
                 nologs=False,
                 safe=False,
                 settings_file=None,
                 **kwargs):

        self.alldays = alldays
        self.conf = arguments.read_config(settings_file)
        self.data = pd.DataFrame()
        self.filecache = {}
        self.logger = logger or init_logger(loglevel)
        self.logs = {}
        self.nologs = nologs
        self.results_queue = queue.Queue()
        self.safe = safe
        self.settings_file = settings_file or arguments.DEFAULT_SETTINGS_FILE
        self.server = None
        self.systems = [item for item in self.conf.sections()
                        if item not in ['GATEWAY', 'MISC']]
        add_methods_to_pandas_dataframe(self.logger)
Esempio n. 19
0
def read_pickle(name, compress=False, logger=None):
    """
    Restore dataframe plus its metadata from (optionally deflated) pickle store

    Arguments:
        name(str): Input file name
        compress (Optional[boolean]):
            Whether or not the file is compressed (``True`` if file extension
            ends with '.gz'). Defaults to ``False``.
        logger (Optional[logging.Logger]): Optional logger object
    Return:
        ``Collector``
    """
    if compress or name.endswith('.gz'):
        mode = gzip
    else:
        mode = builtins
    optargs = {'encoding': 'latin1'} if six.PY3 else {}
    with mode.open(name, 'rb') as picklein:
        collector_ = cPickle.load(picklein, **optargs)
    if logger:
        collector_.logger = logger
    collector_.logger = logger or init_logger()
    return collector_
Esempio n. 20
0
def read_pickle(name, compress=False, logger=None):
    """
    Restore dataframe plus its metadata from (optionally deflated) pickle store

    Arguments:
        name(str): Input file name
        compress (Optional[boolean]):
            Whether or not the file is compressed (``True`` if file extension
            ends with '.gz'). Defaults to ``False``.
        logger (Optional[logging.Logger]): Optional logger object
    Return:
        ``Collector``
    """
    if compress or name.endswith('.gz'):
        mode = gzip
    else:
        mode = builtins
    optargs = {'encoding': 'latin1'} if six.PY3 else {}
    with mode.open(name, 'rb') as picklein:
        collector_ = cPickle.load(picklein, **optargs)
    if logger:
        collector_.logger = logger
    collector_.logger = logger or init_logger()
    return collector_
Esempio n. 21
0
import unittest
from os import path

import numpy as np
import pandas as pd
from t4mon import logger
from t4mon.arguments import read_config
from t4mon.collector import (Collector, read_pickle,
                             add_methods_to_pandas_dataframe)
from t4mon.orchestrator import Orchestrator

__all__ = ('BaseTestClass', 'OrchestratorSandbox', 'MY_DIR', 'LOGGER',
           'TEST_CONFIG', 'BAD_CONFIG', 'TEST_CSV', 'TEST_DATAFRAME',
           'TEST_GRAPHS_FILE', 'TEST_HTMLTEMPLATE', 'TEST_PKL')

LOGGER = logger.init_logger(loglevel='DEBUG', name='test-t4mon')

TEST_CONFIG = 'test/test_settings.cfg'
MY_DIR = path.dirname(path.abspath(TEST_CONFIG))
BAD_CONFIG = 'test/test_settings_BAD.cfg'
TEST_CALC = 'test/test_calc.cfg'
TEST_CSV = 'test/test_data.csv'
TEST_DATAFRAME = pd.DataFrame(np.random.randn(100, 4),
                              columns=['test1', 'test2', 'test3', 'test4'])
TEST_GRAPHS_FILE = 'test/test_graphs.cfg'
TEST_HTMLTEMPLATE = 'test/test_template.html'
TEST_ZIPFILE = 'test/test_t4.zip'
TEST_PKL = 'test/test_data{}.pkl.gz'.format(sys.version_info[0])


def random_tag(n=5):
Esempio n. 22
0
)
from t4mon.orchestrator import Orchestrator

__all__ = ('BaseTestClass',
           'OrchestratorSandbox',
           'MY_DIR',
           'LOGGER',
           'TEST_CONFIG',
           'BAD_CONFIG',
           'TEST_CSV',
           'TEST_DATAFRAME',
           'TEST_GRAPHS_FILE',
           'TEST_HTMLTEMPLATE',
           'TEST_PKL')

LOGGER = logger.init_logger(loglevel='DEBUG', name='test-t4mon')

TEST_CONFIG = 'test/test_settings.cfg'
MY_DIR = path.dirname(path.abspath(TEST_CONFIG))
BAD_CONFIG = 'test/test_settings_BAD.cfg'
TEST_CALC = 'test/test_calc.cfg'
TEST_CSV = 'test/test_data.csv'
TEST_DATAFRAME = pd.DataFrame(np.random.randn(100, 4),
                              columns=['test1',
                                       'test2',
                                       'test3',
                                       'test4'])
TEST_GRAPHS_FILE = 'test/test_graphs.cfg'
TEST_HTMLTEMPLATE = 'test/test_template.html'
TEST_ZIPFILE = 'test/test_t4.zip'
TEST_PKL = 'test/test_data{}.pkl.gz'.format(sys.version_info[0])
Esempio n. 23
0
 def __setstate__(self, state):
     """
     """
     state['logger'] = init_logger(name=state.get('loggername'))
     self.__dict__.update(state)
Esempio n. 24
0
def select(dataframe, *args, **kwargs):
    """
    Get view of selected variables that match columns from the dataframe.

    Arguments:
        dataframe(pandas.DataFrame): Input data
        \*args(List[str]): List of regular expressions selecting column names
    Keyword Arguments:
        filter(str):
            Filter based on the index level and content, only one filter
            allowed. Example: ``system='SYSTEM1'``
        excluded(List[str]):
            Exclusion list, items matching this list (case insensitive) will
            not be selected.
        logger(logging.Logger): Optional logger instance

    Returns:
        ``pandas.DataFrame``


    """
    logger = kwargs.pop('logger', '') or init_logger()
    excluded = kwargs.pop('excluded', None)
    (ix_level, filter_by) = kwargs.popitem() if kwargs else (None, None)
    ix_levels = [level.upper() for level in dataframe.index.names if level]
    if ix_level and ix_level.upper() not in ix_levels:
        logger.warning('Bad filter found: "{0}" not found in index '
                       '(case insensitive)'.format(ix_level))
        return pd.DataFrame()

    if ix_level:
        ix_level = _find_in_iterable_case_insensitive(
            iterable=dataframe.index.names,
            name=ix_level
        )
        try:
            if not filter_by:  # fallback if filter_by is not a valid value
                filter_by = dataframe.index.get_level_values(
                    ix_level
                ).unique()[0]
            filter_by = _find_in_iterable_case_insensitive(
                iterable=dataframe.index.get_level_values(ix_level),
                name=filter_by
            )
            _df = dataframe.xs(filter_by,
                               level=ix_level) if filter_by else pd.DataFrame()
        except KeyError:
            logger.warning('Value: "{0}" not found in index level "{1}"!'
                           .format(filter_by, ix_level))
            return pd.DataFrame()
    else:
        _df = dataframe

    # Drop all columns that have all values missing
    _df.dropna(axis=1, how='all')

    if len(args) == 0:
        logger.warning('No variables were selected, returning all columns{0}'
                       .format(' for level {0}={1}'.format(ix_level, filter_by)
                               if filter_by else ''))
        return _df
    return _df[get_matching_columns(_df, *args, excluded=excluded)]