def __init__(self, app, store, memory=10): # type: (dash.Dash, dict, int) -> None ''' Constructs EventListener. Args: app (dash.Dash): Dash application instance. store (dict): Dash store. memory (int, optional): Number of state changes to remember. Default: 10. Raises: EnforceError: If app is not an instance of dash.Dash. EnforceError: If app is not an instance of dict. EnforceError: If memory is less than 1. ''' Enforce(app, 'instance of', dash.Dash) Enforce(store, 'instance of', dict) msg = 'Memory must be greater or equal to {b}. {a} < {b}.' Enforce(memory, '>=', 1, message=msg) # ---------------------------------------------------------------------- self._app = app # type: dash.Dash self.events = {} # type: dict self.state = deque([deepcopy(store)], memory) # type: deque
def filter_data(data, column, comparator, value): # type: (DataFrame, str, str, Any) -> DataFrame ''' Filters given data via comparator(column value, value). Legal comparators: * == ``lambda a, b: a == b`` * != ``lambda a, b: a != b`` * > ``lambda a, b: a > b`` * >= ``lambda a, b: a >= b`` * < ``lambda a, b: a < b`` * <= ``lambda a, b: a <= b`` * ~ ``lambda a, b: bool(re.search(a, b, flags=re.I))`` * !~ ``lambda a, b: not bool(re.search(a, b, flags=re.I))`` Args: data (DataFrame): DataFrame to be filtered. column (str): Column name. comparator (str): String representation of comparator. value (object): Value to be compared. Raises: EnforceError: If data is not a DataFrame. EnforceError: If column is not a string. EnforceError: If column not in data columns. EnforceError: If illegal comparator given. EnforceError: If comparator is ~ or !~ and value is not a string. Returns: DataFrame: Filtered data. ''' Enforce(data, 'instance of', DataFrame) msg = 'Column must be a str. {a} is not str.' Enforce(column, 'instance of', str, message=msg) eft.enforce_columns_in_dataframe([column], data) lut = { '==': lambda a, b: a == b, '!=': lambda a, b: a != b, '>': lambda a, b: a > b, '>=': lambda a, b: a >= b, '<': lambda a, b: a < b, '<=': lambda a, b: a <= b, '~': lambda a, b: bool(re.search(b, a, flags=re.I)), '!~': lambda a, b: not bool(re.search(b, a, flags=re.I)), } msg = 'Illegal comparator. {a} not in [==, !=, >, >=, <, <=, ~, !~].' Enforce(comparator, 'in', lut.keys(), message=msg) if comparator in ['~', '!~']: msg = 'Value must be string if comparator is ~ or !~. {a} is not str.' Enforce(value, 'instance of', str, message=msg) # -------------------------------------------------------------------------- op = lut[comparator] mask = data[column].apply(lambda x: op(x, value)) data = data[mask] return data
def test_init_instance_of(self): Food = self.Food Taco = self.Taco Enforce(1, 'instance of', int) Enforce(Taco(), 'instance of', Food) expected = r'1 is not instance of \[.*str.*, .*float.*\]\.' with self.assertRaisesRegexp(EnforceError, expected): Enforce(1, 'instance of', [str, float])
def test_init_not_instance_of(self): Food = self.Food Taco = self.Taco Enforce(1, 'not instance of', float) Enforce(Taco(), 'not instance of', Exception) expected = r'1 is instance of \[.*int.*, .*float.*\]\.' with self.assertRaisesRegexp(EnforceError, expected): Enforce(1, 'not instance of', [int, float]) expected = r'.*Taco.* is instance of \[.*Food.*, .*float.*\]\.' with self.assertRaisesRegexp(EnforceError, expected): Enforce(Taco(), 'not instance of', [Food, float])
def __init__( self, message='', name='LogRuntime', level='info', suppress=False ): # type: (str, str, Union[str, int], bool) -> None ''' Constructs a LogRuntime instance. Args: message (str, optional): Logging message. Default: ''. name (str, optional): Name of logger. Default: 'LogRuntime'. level (str or int, optional): Log level. Default: info. suppress (bool, optional): Whether to suppress logging. Default: False. Raises: EnforceError: If message is not a string. EnforceError: If name is not a string. EnforceError: If level is not legal logging level. EnforceError: If suppress is not a boolean. ''' Enforce(message, 'instance of', str) Enforce(name, 'instance of', str) Enforce(suppress, 'instance of', bool) keys = ['debug', 'info', 'warn', 'error', 'critical', 'fatal'] values = [getattr(logging, x.upper()) for x in keys] lut = dict(zip(keys, values)) # type: Dict[str, int] msg = 'Log level must be an integer or string. Given value: {a}. ' lut_msg = ', '.join([f'{k}: {v}' for k, v in zip(keys, values)]) msg += f'Legal values: [{lut_msg}].' level_ = 0 if isinstance(level, int): Enforce(level, 'in', values, message=msg) elif isinstance(level, str): level = level.lower() Enforce(level, 'in', keys, message=msg) level_ = lut[level] else: raise EnforceError(msg.format(a=level)) # ---------------------------------------------------------------------- self._message = message self._stopwatch = StopWatch() self._logger = logging.getLogger(name) self._level = level_ self._suppress = suppress
def enforce_dataframes_are_equal(a, b): ''' Endsures that DataFrames a and b have equal contents. Args: a (DataFrame): DataFrame A. b (DataFrame): DataFrame B. Raises: EnforceError: If a and b are not equal. ''' # column names a_cols = set(a.columns.tolist()) b_cols = b.columns.tolist() diff = a_cols.symmetric_difference(b_cols) diff = sorted(list(diff)) msg = f'A and b have different columns: {diff}.' Enforce(len(diff), '==', 0, message=msg) # shape msg = 'A and b have different shapes. {a} != {b}.' Enforce(a.shape, '==', b.shape, message=msg) # NaNs cannot be compared a = a.fillna('---NAN---') b = b.fillna('---NAN---') # values errors = [] for col in a.columns: mask = a[col] != b[col] a_vals = a.loc[mask, col].tolist() if len(a_vals) > 0: b_vals = b.loc[mask, col].tolist() error = [[col, av, bv] for av, bv in zip(a_vals, b_vals)] errors.extend(error) if len(errors) > 0: msg = DataFrame(errors, columns=['column', 'a', 'b']).to_string() msg = f'DatFrames have different values:\n{msg}' raise EnforceError(msg) # records a = a.to_dict(orient='records') b = b.to_dict(orient='records') Enforce(a, '==', b)
def test_instance_of(self): e = Enforce(1, '==', 1) self.assertTrue(e.instance_of(1, int)) self.assertFalse(e.instance_of(1, str)) self.assertTrue(e.instance_of(1, [int, float])) self.assertFalse(e.instance_of(1, [str, dict])) self.assertTrue(e.instance_of(1, tuple([int, float]))) self.assertFalse(e.instance_of(1, tuple([str, dict])))
def test_init_not_in(self): Foo = self.Foo Bar = self.Bar EnforceFooBar = self.EnforceFooBar # regular Enforce('foo', 'not in', ['taco', 'bar']) expected = r"foo is in \['foo', 'bar'\]\." with self.assertRaisesRegexp(EnforceError, expected): Enforce('foo', 'not in', ['foo', 'bar']) # attribute EnforceFooBar(Foo(1), 'not in', [Bar(2), Foo(2)], 'value') expected = r"value of <Foo> is in value of \[<Bar>, <Bar>\]\." with self.assertRaisesRegexp(EnforceError, expected): EnforceFooBar(Foo(1), 'not in', [Bar(1), Bar(3)], 'value')
def post_to_slack(url, channel, message): # type (str, str, str) -> urllib.request.HttpRespons ''' Post a given message to a given slack channel. Args: url (str): https://hooks.slack.com/services URL. channel (str): Channel name. message (str): Message to be posted. Raises: EnforceError: If URL is not a string. EnforceError: If URL does not start with https://hooks.slack.com/services EnforceError: If channel is not a string. EnforceError: If message is not a string. Returns: HTTPResponse: Response. ''' Enforce(url, 'instance of', str) Enforce(channel, 'instance of', str) Enforce(message, 'instance of', str) msg = 'URL must begin with https://hooks.slack.com/services/. ' msg += f'Given URL: {url}' Enforce( url.startswith('https://hooks.slack.com/services/'), '==', True, message=msg ) # -------------------------------------------------------------------------- request = urllib.request.Request( url, method='POST', headers={'Content-type': 'application/json'}, data=json.dumps(dict( channel='#' + channel, text=message, )).encode(), ) return urllib.request.urlopen(request)
def truncate_list(items, size=3): # type (list, int) -> list ''' Truncates a given list to a given size, replaces the middle contents with "...". Args: items (list): List of objects. size (int, optional): Size of output list. Raises: EnforceError: If item is not a list. EnforceError: If size is not an integer greater than -1. Returns: list: List of given size. ''' Enforce(items, 'instance of', list, message='Items must be a list.') msg = 'Size must be an integer greater than -1. Given value: {a}.' Enforce(size, 'instance of', int, message=msg) Enforce(size, '>', -1, message=msg) # -------------------------------------------------------------------------- if len(items) <= size: return items if size == 0: return [] if size == 1: return items[:1] if size == 2: return [items[0], items[-1]] output = items[:size - 2] output.append('...') output.append(items[-1]) return output
def enforce_columns_in_dataframe(columns, data): # type: (List[str], DataFrame) -> None ''' Ensure all given columns are in given dataframe columns. Args: columns (list[str]): Column names. data (DataFrame): DataFrame. Raises: EnforceError: If any column not found in data.columns. ''' cols = data.columns.tolist() diff = set(columns).difference(cols) # type: Any diff = sorted(list(diff)) msg = f'Given columns not found in data. {diff} not in {cols}.' Enforce(diff, '==', [], message=msg)
def test_get_message_skip(self): e = Enforce(1, '==', 1) skip = [ Comparator.IN, Comparator.NOT_IN, Comparator.INSTANCE_OF, Comparator.NOT_INSTANCE_OF, ] expected = '{a} is {comparator.message} {b}.' for item in skip: result = e._get_message(None, item) self.assertEqual(result, expected) expected = '{attribute} of {a} is {comparator.message} {attribute} of {b}.' for item in skip: result = e._get_message('foo', item) self.assertEqual(result, expected)
def listen(self, event, callback): # type: (str, Callable[[Any, dict, dash.Dash], dict]) -> EventListener ''' Listen for given event and call given callback. Args: event (str): Event name. callback (function): Function of form (value, store, app) -> store. Raises: EnforceError: If event is not a string. Returns: EventListener: self. ''' msg = 'Event name must be a string. {a} is not a string.' Enforce(event, 'instance of', str, message=msg) self.events[event] = callback return self
def emit(self, event, value): # type: (str, object) -> EventListener ''' Call a registered callback guven an event and value. Args: event (str): Event name. value (object): Value to be given to callback. Raises: EnforceError: If event is not a string. Returns: EventListener: self. ''' msg = 'Event name must be a string. {a} is not a string.' Enforce(event, 'instance of', str, message=msg) if event in self.events: store = self.events[event](value, self.store, self._app) self.state.append(deepcopy(store)) return self
def truncate_blob_lists(blob, size=3): # type: (dict, int) -> dict ''' Truncates lists inside given JSON blob to a given size. Args: blob (dict): Blob to be truncated. size (int, optional): Size of lists. Default 3. Raises: EnforceError: If blob is not a dict. Returns: dict: Truncated blob. ''' Enforce(blob, 'instance of', dict, message='Blob must be a dict.') # -------------------------------------------------------------------------- def recurse_list(items, size): output = [] for item in truncate_list(items, size=size): if isinstance(item, dict): item = recurse(item) elif isinstance(item, list): item = recurse_list(item, size) output.append(item) return output def recurse(item): output = {} for k, v in item.items(): if isinstance(v, dict): output[k] = recurse(v) elif isinstance(v, list): output[k] = recurse_list(v, size) else: output[k] = v return output return recurse(blob)
def test_get_message_attribute(self): e = Enforce(1, '==', 1) result = e._get_message('foo', Comparator.EQ) expected = '{attribute} of {a} is {comparator.message} {attribute} of ' expected += '{b}. {a_val} {comparator.negation_symbol} {b_val}.' self.assertEqual(result, expected)
def get_plots(data, plots): # type: (List[dict], List[dict]) -> List[dcc.Graph] ''' Gets a Dash plots using given dicts. Assumes dict element has all columns of table as keys. Args: data (list[dict]): List of dicts defining data. plots (list[dict]): List of dicts defining plots. Raises: EnforceError: If data is not a list of dicts. EnforceError: If plots is not a list of dicts. Returns: list[dcc.Graph]: Plots. ''' msg = 'Data must be a list of dictionaries. Given value: {a}.' Enforce(data, 'instance of', list, message=msg) for item in data: Enforce(item, 'instance of', dict, message=msg) msg = 'Plots must be a list of dictionaries. Given value: {a}.' Enforce(plots, 'instance of', list, message=msg) for item in plots: Enforce(item, 'instance of', dict, message=msg) # -------------------------------------------------------------------------- data_ = DataFrame(data) if 'date' in data_.columns: data_.date = DatetimeIndex(data_.date) elems = [] for i, x in enumerate(plots): plot = cfg.PlotItem(x) plot.validate() plot = plot.to_primitive() min_width = str(plot['min_width']) + '%' try: fig = sdt.get_figure( data_, filters=plot['filters'], group=plot['group'], pivot=plot['pivot'], **plot['figure'], ) fig = dcc.Graph( id=f'plot-{i:02d}', className='plot', figure=fig, style={'min-width': min_width}, ) except (DataError, EnforceError): fig = html.Div( id=f'plot-{i:02d}', className='plot plot-error', style={'min-width': min_width}, children=html.Div( className='plot-error-container', children=html.Div( className='plot-error-message', children='no data found' ) ) ) elems.append(fig) return elems
def test_gt(self): e = Enforce(1, '==', 1) self.assertTrue(e.gt(2, 1)) self.assertFalse(e.gt(1, 1)) self.assertFalse(e.gt(1, 2))
def test_get_type_name(self): e = Enforce(1, '==', 1) Taco = self.Taco expected = Taco().__class__.__name__ result = e.get_type_name(Taco()) self.assertEqual(result, expected)
def test_get_message_not_similar(self): e = Enforce(1, '==', 1) result = e._get_message('foo', Comparator.NOT_SIMILAR) expected = '{attribute} of {a} is {comparator.message} {attribute} of ' expected += '{b}. Delta {delta} is not greater than epsilon {epsilon}.' self.assertEqual(result, expected)
def test_in(self): e = Enforce(1, '==', 1) self.assertTrue(e.in_(1, [1, 2])) self.assertFalse(e.in_(1, [2, 3]))
def group_data(data, columns, metric, datetime_column='date'): # type: (DataFrame, Union[str, List[str]], str, str) -> DataFrame ''' Groups given data by given columns according to given metric. If a legal time interval is given in the columns, then an additional special column of that same name is added to the data for grouping. Legal metrics: * max ``lambda x: x.max()`` * mean ``lambda x: x.mean()`` * min ``lambda x: x.min()`` * std ``lambda x: x.std()`` * sum ``lambda x: x.sum()`` * var ``lambda x: x.var()`` * count ``lambda x: x.count()`` Legal time intervals: * year * quarter * month * two_week * week * day * hour * half_hour * quarter_hour * minute * second * microsecond Args: data (DataFrame): DataFrame to be grouped. columns (str or list[str]): Columns to group data by. metric (str): String representation of metric. datetime_column (str, optinal): Datetime column for time grouping. Default: date. Raises: EnforceError: If data is not a DataFrame. EnforceError: If columns not in data columns. EnforceError: If illegal metric given. EnforceError: If time interval in columns and datetime_column not in columns. Returns: DataFrame: Grouped data. ''' # luts met_lut = { 'max': lambda x: x.max(), 'mean': lambda x: x.mean(), 'min': lambda x: x.min(), 'std': lambda x: x.std(), 'sum': lambda x: x.sum(), 'var': lambda x: x.var(), 'count': lambda x: x.count(), } time_lut = { 'year': lambda x: dt.datetime(x.year, 1, 1), 'quarter': lambda x: dt.datetime(x.year, int(np.ceil(x.month / 3) * 3 - 2), 1), 'month': lambda x: dt.datetime(x.year, x.month, 1), 'two_week': lambda x: dt.datetime(x.year, x.month, min(int(np.ceil(x.day / 14) * 14 - 13), 28)), 'week': lambda x: dt.datetime(x.year, x.month, max(1, min([int(x.month / 7) * 7, 28]))), 'day': lambda x: dt.datetime(x.year, x.month, x.day), 'hour': lambda x: dt.datetime(x.year, x.month, x.day, x.hour), 'half_hour': lambda x: dt.datetime(x.year, x.month, x.day, x.hour, int(x.minute / 30) * 30), 'quarter_hour': lambda x: dt.datetime(x.year, x.month, x.day, x.hour, int(x.minute / 15) * 15), 'minute': lambda x: dt.datetime(x.year, x.month, x.day, x.hour, x.minute), 'second': lambda x: dt.datetime(x.year, x.month, x.day, x.hour, x.minute, x. second), 'microsecond': lambda x: dt.datetime(x.year, x.month, x.day, x.hour, x.minute, x. second, x.microsecond), } # -------------------------------------------------------------------------- # enforcements Enforce(data, 'instance of', DataFrame) columns_ = columns # type: Any if type(columns_) != list: columns_ = [columns_] cols = list(filter(lambda x: x not in time_lut.keys(), columns_)) eft.enforce_columns_in_dataframe(cols, data) msg = '{a} is not a legal metric. Legal metrics: {b}.' Enforce(metric, 'in', sorted(list(met_lut.keys())), message=msg) # time column if len(columns_) > len(cols): eft.enforce_columns_in_dataframe([datetime_column], data) msg = 'Datetime column of type {a}, it must be of type {b}.' Enforce(data[datetime_column].dtype.type, '==', np.datetime64, message=msg) # -------------------------------------------------------------------------- for col in columns_: if col in time_lut.keys(): op = time_lut[col] data[col] = data[datetime_column].apply(op) agg = met_lut[metric] cols = data.columns.tolist() grp = data.groupby(columns_, as_index=False) output = agg(grp) # get first value for columns that cannot be computed by given metric diff = set(cols).difference(output.columns.tolist()) if len(diff) > 0: first = grp.first() for col in diff: output[col] = first[col] return output
def test_similar(self): e = Enforce(1, '==', 1) self.assertTrue(e.similar(0.9, epsilon=1)) self.assertFalse(e.similar(1.0, epsilon=1)) self.assertFalse(e.similar(1.1, epsilon=1))
def test_lte(self): e = Enforce(1, '==', 1) self.assertTrue(e.lte(1, 2)) self.assertTrue(e.lte(1, 1)) self.assertFalse(e.lte(2, 1))
def pivot_data(data, columns, values=[], index=None): # type: (DataFrame, List[str], List[str], Optional[str]) -> DataFrame ''' Pivots a given dataframe via a list of columns. Legal time columns: * date * year * quarter * month * two_week * week * day * hour * half_hour * quarter_hour * minute * second * microsecond Args: data (DataFrame): DataFrame to be pivoted. columns (list[str]): Columns whose unique values become separate traces within a plot. values (list[str], optional): Columns whose values become the values within each trace of a plot. Default: []. index (str, optional): Column whose values become the y axis values of a plot. Default: None. Raises: EnforceError: If data is not a DataFrame. EnforceError: If data is of zero length. EnforceError: If columns not in data columns. EnforceError: If values not in data columns. EnforceError: If index not in data columns or legal time columns. Returns: DataFrame: Pivoted data. ''' time_cols = [ 'date', 'year', 'quarter', 'month', 'two_week', 'week', 'day', 'hour', 'half_hour', 'quarter_hour', 'minute', 'second', 'microsecond', ] Enforce(data, 'instance of', DataFrame) msg = 'DataFrame must be at least 1 in length. Given length: {a}.' Enforce(len(data), '>=', 1, message=msg) eft.enforce_columns_in_dataframe(columns, data) eft.enforce_columns_in_dataframe(values, data) if index is not None: msg = '{a} is not in legal column names: {b}.' Enforce(index, 'in', data.columns.tolist() + time_cols, message=msg) # -------------------------------------------------------------------------- vals = copy(values) if index is not None and index not in values: vals.append(index) if index in time_cols: data[index] = data[index] \ .apply(lambda x: x + dt.timedelta(microseconds=randint(0, 999999))) data = data.pivot(columns=columns, values=vals, index=index) data = data[values] data.columns = data.columns.droplevel(0) return data
def test_difference(self): e = Enforce(1, '==', 1) self.assertEqual(e.difference(1, 2), 1) self.assertEqual(e.difference(3, 2), 1)
def test_eq(self): e = Enforce(1, '==', 1) self.assertTrue(e.eq(1, 1)) self.assertFalse(e.eq(1, 2))