def test_table_info_attributes(table_types): """ Test the info() method of printing a summary of table column attributes """ a = np.array([1, 2, 3], dtype='int32') b = np.array([1, 2, 3], dtype='float32') c = np.array(['a', 'c', 'e'], dtype='|S1') t = table_types.Table([a, b, c], names=['a', 'b', 'c']) # Minimal output for a typical table tinfo = t.info(out=None) subcls = ['class'] if table_types.Table.__name__ == 'MyTable' else [] assert tinfo.colnames == [ 'name', 'dtype', 'shape', 'unit', 'format', 'description', 'class', 'n_bad', 'length' ] assert np.all(tinfo['name'] == ['a', 'b', 'c']) assert np.all( tinfo['dtype'] == ['int32', 'float32', dtype_info_name('S1')]) if subcls: assert np.all(tinfo['class'] == ['MyColumn'] * 3) # All output fields including a mixin column t['d'] = [1, 2, 3] * u.m t['d'].description = 'quantity' t['a'].format = '%02d' t['e'] = time.Time([1, 2, 3], format='mjd') t['e'].info.description = 'time' t['f'] = coordinates.SkyCoord([1, 2, 3], [1, 2, 3], unit='deg') t['f'].info.description = 'skycoord' tinfo = t.info(out=None) assert np.all(tinfo['name'] == 'a b c d e f'.split()) assert np.all(tinfo['dtype'] == [ 'int32', 'float32', dtype_info_name('S1'), 'float64', 'object', 'object' ]) assert np.all(tinfo['unit'] == ['', '', '', 'm', '', 'deg,deg']) assert np.all(tinfo['format'] == ['%02d', '', '', '', '', '']) assert np.all( tinfo['description'] == ['', '', '', 'quantity', 'time', 'skycoord']) cls = t.ColumnClass.__name__ assert np.all(tinfo['class'] == [cls, cls, cls, cls, 'Time', 'SkyCoord']) # Test that repr(t.info) is same as t.info() out = StringIO() t.info(out=out) assert repr(t.info) == out.getvalue()
def test_table_info_stats(table_types): """ Test the info() method of printing a summary of table column statistics """ a = np.array([1, 2, 1, 2], dtype='int32') b = np.array([1, 2, 1, 2], dtype='float32') c = np.array(['a', 'c', 'e', 'f'], dtype='|S1') d = time.Time([1, 2, 1, 2], format='mjd') t = table_types.Table([a, b, c, d], names=['a', 'b', 'c', 'd']) # option = 'stats' masked = 'masked=True ' if t.masked else '' out = StringIO() t.info('stats', out=out) table_header_line = f'<{t.__class__.__name__} {masked}length=4>' exp = [ table_header_line, 'name mean std min max', '---- ---- --- --- ---', ' a 1.5 0.5 1 2', ' b 1.5 0.5 1.0 2.0', ' c -- -- -- --', ' d -- -- 1.0 2.0' ] assert out.getvalue().splitlines() == exp # option = ['attributes', 'stats'] tinfo = t.info(['attributes', 'stats'], out=None) assert tinfo.colnames == [ 'name', 'dtype', 'shape', 'unit', 'format', 'description', 'class', 'mean', 'std', 'min', 'max', 'n_bad', 'length' ] assert np.all(tinfo['mean'] == ['1.5', '1.5', '--', '--']) assert np.all(tinfo['std'] == ['0.5', '0.5', '--', '--']) assert np.all(tinfo['min'] == ['1', '1.0', '--', '1.0']) assert np.all(tinfo['max'] == ['2', '2.0', '--', '2.0']) out = StringIO() t.info('stats', out=out) exp = [ table_header_line, 'name mean std min max', '---- ---- --- --- ---', ' a 1.5 0.5 1 2', ' b 1.5 0.5 1.0 2.0', ' c -- -- -- --', ' d -- -- 1.0 2.0' ] assert out.getvalue().splitlines() == exp # option = ['attributes', custom] custom = data_info_factory(names=['sum', 'first'], funcs=[np.sum, lambda col: col[0]]) out = StringIO() tinfo = t.info(['attributes', custom], out=None) assert tinfo.colnames == [ 'name', 'dtype', 'shape', 'unit', 'format', 'description', 'class', 'sum', 'first', 'n_bad', 'length' ] assert np.all(tinfo['name'] == ['a', 'b', 'c', 'd']) assert np.all(tinfo['dtype'] == ['int32', 'float32', dtype_info_name('S1'), 'object']) assert np.all(tinfo['sum'] == ['6', '6.0', '--', '--']) assert np.all(tinfo['first'] == ['1', '1.0', 'a', '1.0'])
def test_table_info_stats(table_types): """ Test the info() method of printing a summary of table column statistics """ a = np.array([1, 2, 1, 2], dtype='int32') b = np.array([1, 2, 1, 2], dtype='float32') c = np.array(['a', 'c', 'e', 'f'], dtype='|S1') d = time.Time([1, 2, 1, 2], format='mjd') t = table_types.Table([a, b, c, d], names=['a', 'b', 'c', 'd']) # option = 'stats' masked = 'masked=True ' if t.masked else '' out = StringIO() t.info('stats', out=out) table_header_line = '<{0} {1}length=4>'.format(t.__class__.__name__, masked) exp = [table_header_line, 'name mean std min max', '---- ---- --- --- ---', ' a 1.5 0.5 1 2', ' b 1.5 0.5 1.0 2.0', ' c -- -- -- --', ' d -- -- 1.0 2.0'] assert out.getvalue().splitlines() == exp # option = ['attributes', 'stats'] tinfo = t.info(['attributes', 'stats'], out=None) assert tinfo.colnames == ['name', 'dtype', 'shape', 'unit', 'format', 'description', 'class', 'mean', 'std', 'min', 'max', 'n_bad', 'length'] assert np.all(tinfo['mean'] == ['1.5', '1.5', '--', '--']) assert np.all(tinfo['std'] == ['0.5', '0.5', '--', '--']) assert np.all(tinfo['min'] == ['1', '1.0', '--', '1.0']) assert np.all(tinfo['max'] == ['2', '2.0', '--', '2.0']) out = StringIO() t.info('stats', out=out) exp = [table_header_line, 'name mean std min max', '---- ---- --- --- ---', ' a 1.5 0.5 1 2', ' b 1.5 0.5 1.0 2.0', ' c -- -- -- --', ' d -- -- 1.0 2.0'] assert out.getvalue().splitlines() == exp # option = ['attributes', custom] custom = data_info_factory(names=['sum', 'first'], funcs=[np.sum, lambda col: col[0]]) out = StringIO() tinfo = t.info(['attributes', custom], out=None) assert tinfo.colnames == ['name', 'dtype', 'shape', 'unit', 'format', 'description', 'class', 'sum', 'first', 'n_bad', 'length'] assert np.all(tinfo['name'] == ['a', 'b', 'c', 'd']) assert np.all(tinfo['dtype'] == ['int32', 'float32', dtype_info_name('S1'), 'object']) assert np.all(tinfo['sum'] == ['6', '6.0', '--', '--']) assert np.all(tinfo['first'] == ['1', '1.0', 'a', '1.0'])
def test_table_info_attributes(table_types): """ Test the info() method of printing a summary of table column attributes """ a = np.array([1, 2, 3], dtype='int32') b = np.array([1, 2, 3], dtype='float32') c = np.array(['a', 'c', 'e'], dtype='|S1') t = table_types.Table([a, b, c], names=['a', 'b', 'c']) # Minimal output for a typical table tinfo = t.info(out=None) subcls = ['class'] if table_types.Table.__name__ == 'MyTable' else [] assert tinfo.colnames == ['name', 'dtype', 'shape', 'unit', 'format', 'description', 'class', 'n_bad', 'length'] assert np.all(tinfo['name'] == ['a', 'b', 'c']) assert np.all(tinfo['dtype'] == ['int32', 'float32', dtype_info_name('S1')]) if subcls: assert np.all(tinfo['class'] == ['MyColumn'] * 3) # All output fields including a mixin column t['d'] = [1, 2, 3] * u.m t['d'].description = 'quantity' t['a'].format = '%02d' t['e'] = time.Time([1, 2, 3], format='mjd') t['e'].info.description = 'time' t['f'] = coordinates.SkyCoord([1, 2, 3], [1, 2, 3], unit='deg') t['f'].info.description = 'skycoord' tinfo = t.info(out=None) assert np.all(tinfo['name'] == 'a b c d e f'.split()) assert np.all(tinfo['dtype'] == ['int32', 'float32', dtype_info_name('S1'), 'float64', 'object', 'object']) assert np.all(tinfo['unit'] == ['', '', '', 'm', '', 'deg,deg']) assert np.all(tinfo['format'] == ['%02d', '', '', '', '', '']) assert np.all(tinfo['description'] == ['', '', '', 'quantity', 'time', 'skycoord']) cls = t.ColumnClass.__name__ assert np.all(tinfo['class'] == [cls, cls, cls, cls, 'Time', 'SkyCoord']) # Test that repr(t.info) is same as t.info() out = StringIO() t.info(out=out) assert repr(t.info) == out.getvalue()
def test_dtype_info_name(input, output): """ Test that dtype_info_name is giving the expected output Here the available types:: 'b' boolean 'i' (signed) integer 'u' unsigned integer 'f' floating-point 'c' complex-floating point 'O' (Python) objects 'S', 'a' (byte-)string 'U' Unicode 'V' raw data (void) """ assert dtype_info_name(input) == output
def _pformat_col_iter(self, col, max_lines, show_name, show_unit, outs, show_dtype=False, show_length=None): """Iterator which yields formatted string representation of column values. Parameters ---------- max_lines : int Maximum lines of output (header + data rows) show_name : bool Include column name. Default is True. show_unit : bool Include a header row for unit. Default is to show a row for units only if one or more columns has a defined value for the unit. outs : dict Must be a dict which is used to pass back additional values defined within the iterator. show_dtype : bool Include column dtype. Default is False. show_length : bool Include column length at end. Default is to show this only if the column is not shown completely. """ max_lines, _ = self._get_pprint_size(max_lines, -1) dtype = getattr(col, 'dtype', None) multidims = getattr(col, 'shape', [0])[1:] if multidims: multidim0 = tuple(0 for n in multidims) multidim1 = tuple(n - 1 for n in multidims) trivial_multidims = np.prod(multidims) == 1 i_dashes = None i_centers = [] # Line indexes where content should be centered n_header = 0 if show_name: i_centers.append(n_header) # Get column name (or 'None' if not set) col_name = str(col.info.name) n_header += 1 yield self._name_and_structure(col_name, dtype) if show_unit: i_centers.append(n_header) n_header += 1 yield str(col.info.unit or '') if show_dtype: i_centers.append(n_header) n_header += 1 if dtype is not None: col_dtype = dtype_info_name((dtype, multidims)) else: col_dtype = col.__class__.__qualname__ or 'object' yield col_dtype if show_unit or show_name or show_dtype: i_dashes = n_header n_header += 1 yield '---' max_lines -= n_header n_print2 = max_lines // 2 n_rows = len(col) # This block of code is responsible for producing the function that # will format values for this column. The ``format_func`` function # takes two args (col_format, val) and returns the string-formatted # version. Some points to understand: # # - col_format could itself be the formatting function, so it will # actually end up being called with itself as the first arg. In # this case the function is expected to ignore its first arg. # # - auto_format_func is a function that gets called on the first # column value that is being formatted. It then determines an # appropriate formatting function given the actual value to be # formatted. This might be deterministic or it might involve # try/except. The latter allows for different string formatting # options like %f or {:5.3f}. When auto_format_func is called it: # 1. Caches the function in the _format_funcs dict so for subsequent # values the right function is called right away. # 2. Returns the formatted value. # # - possible_string_format_functions is a function that yields a # succession of functions that might successfully format the # value. There is a default, but Mixin methods can override this. # See Quantity for an example. # # - get_auto_format_func() returns a wrapped version of auto_format_func # with the column id and possible_string_format_functions as # enclosed variables. col_format = col.info.format or getattr(col.info, 'default_format', None) pssf = (getattr(col.info, 'possible_string_format_functions', None) or _possible_string_format_functions) auto_format_func = get_auto_format_func(col, pssf) format_func = col.info._format_funcs.get(col_format, auto_format_func) if len(col) > max_lines: if show_length is None: show_length = True i0 = n_print2 - (1 if show_length else 0) i1 = n_rows - n_print2 - max_lines % 2 indices = np.concatenate( [np.arange(0, i0 + 1), np.arange(i1 + 1, len(col))]) else: i0 = -1 indices = np.arange(len(col)) def format_col_str(idx): if multidims: # Prevents columns like Column(data=[[(1,)],[(2,)]], name='a') # with shape (n,1,...,1) from being printed as if there was # more than one element in a row if trivial_multidims: return format_func(col_format, col[(idx, ) + multidim0]) else: left = format_func(col_format, col[(idx, ) + multidim0]) right = format_func(col_format, col[(idx, ) + multidim1]) return f'{left} .. {right}' else: return format_func(col_format, col[idx]) # Add formatted values if within bounds allowed by max_lines for idx in indices: if idx == i0: yield '...' else: try: yield format_col_str(idx) except ValueError: raise ValueError( 'Unable to parse format string "{}" for entry "{}" ' 'in column "{}"'.format(col_format, col[idx], col.info.name)) outs['show_length'] = show_length outs['n_header'] = n_header outs['i_centers'] = i_centers outs['i_dashes'] = i_dashes