def test_evaluate_expression_syntax_error(self): df = pd.DataFrame(data=[[nan, 'gnu', 3], [4, 'gnat', '{{((((*+-/ x y heres_a_syntax_error!!!!!!!!!!!!!!!!}}']], columns=['a', 'b', 'c']) col_specs = {n: ColumnMetadata(Unit(u)) for n, u in zip(['a', 'b', 'c'], ['-', 'text', 'm'])} t = Table(df=df, name='some_table', col_specs=col_specs, destinations=['success', 'glory']) with raises(SyntaxError, match=r"Syntax error in expression in table 'some_table', column 2, row 1"): t.evaluate_expressions({'x': 7, 'y': 9}, inplace=False)
def test_convert_to_ref_units(self): df = pd.DataFrame([ [11, 12, 13], [21, 22, 23]], columns=['a', 'b', 'c']) cs = {n: ColumnMetadata(Unit(u)) for n, u in zip(['a', 'b', 'c'], ['m', 'mm', 'km'])} t = Table(df, name='Fool', col_specs=cs) cup = CustomUnitPolicy([ ScaleUnitConversion(Unit('mm'), Unit('m'), 0.001), ScaleUnitConversion(Unit('km'), Unit('m'), 1000)]) t_ref = t.convert_to_ref_units(cup, inplace=False) assert t_ref.col_units == [Unit('m')] * 3 assert (np.array(t_ref.df) == np.array([[11, 0.012, 13000], [21, 0.022, 23000]])).all()
def test_convert_to_home_units(self): df = pd.DataFrame([ [11, 12, 13], [21, 22, 23]], columns=['a', 'b', 'c']) cs = {n: ColumnMetadata(Unit(u), Unit(hu)) for n, u, hu in zip( ['a', 'b', 'c'], ['m', 'm', 'm'], ['m', 'mm', 'km'])} t = Table(df, name='Fool', col_specs=cs) cup = CustomUnitPolicy([ ScaleUnitConversion(Unit('mm'), Unit('m'), 0.001), ScaleUnitConversion(Unit('km'), Unit('m'), 1000)]) t_home = t.convert_to_home_units(cup) assert t_home.col_units == [Unit('m'), Unit('mm'), Unit('km')] assert (np.array(t_home.df) == np.array([[11, 12000, 0.013], [21, 22000, 0.023]])).all()
def test_convert_to_ref_units_unknown_unit(self): df = pd.DataFrame([ [11, 12, 13], [21, 22, 23]], columns=['a', 'b', 'c']) cs = {n: ColumnMetadata(Unit(u)) for n, u in zip(['a', 'b', 'c'], ['m', 'mm', 'km'])} t = Table(df, name='Fool', col_specs=cs) cup_no_km = CustomUnitPolicy([ScaleUnitConversion(Unit('mm'), Unit('m'), 0.001)]) t_ref_no_km = t.convert_to_ref_units(cup_no_km, inplace=False, units_not_in_policy='ignore') assert t_ref_no_km.col_units == [Unit('m'), Unit('m'), Unit('km')] assert (np.array(t_ref_no_km.df) == np.array([[11, 0.012, 13], [21, 0.022, 23]])).all() with raises(ValueError): t.convert_to_ref_units(cup_no_km, inplace=False, units_not_in_policy='raise')
def test_to_csv_nonstring_colnames_and_destinations(self): # PS-53 Bundle.to_csv() fails when column names are not strings nonstring_colnames = [1.234, 666.0, 42.0] nonstring_destinations = [1984, 2001.2001] df = pd.DataFrame(data=[[nan, 'gnu', 3], [4, 'gnat', '{{(+ x y)}}']], columns=nonstring_colnames) col_specs = {n: ColumnMetadata(Unit(u)) for n, u in zip(nonstring_colnames, ['-', 'text', 'm'])} t = Table(df=df, name='some_table', col_specs=col_specs, destinations=nonstring_destinations) out = io.StringIO() t.to_csv(out) assert out.getvalue() == dedent("""\ **some_table;; 1984 2001.2001 1.234;666.0;42.0 -;text;m -;gnu;3 4.0;gnat;{{(+ x y)}} """)
def col_specs_with_format(self): return {'a': ColumnMetadata(Unit('-'), format_str='${:,.2f}'), 'b': ColumnMetadata(Unit('text')), 'c': ColumnMetadata(Unit('m'), format_str='.4e')}
def col_specs(self): return {n: ColumnMetadata(Unit(u)) for n, u in zip(['a', 'b', 'c'], ['-', 'text', 'm'])}
def import_from_word(path: Union[str, Path]) -> Bundle: """ Imports table blocks from tables in an MS Word docx file. Only reads those Word tables whose top left cell is a table block start marker ('**table_name'). MS Word documents cannot strictly live up to the StarTable format specification and therefore this function is not to be considered a formal reader. Rather, it is a utility that merely attempts to import table blocks. :param path: The path to the docx file :return: Bundle containing the imported tables """ try: import docx except ImportError: raise ImportError( "Missing optional dependency 'docx'. Install python-docx package " "for MS Word support. Use pip or conda to install python-docx." ) from None path = str(path) if not os.path.exists(path): raise IOError(f'File not found: {path}') word_doc = docx.Document(path) if not isinstance(word_doc, docx.document.Document): raise IOError(f'Not a docx Document: {word_doc}') tables = [] for wt in word_doc.tables: # Does it even look like a StarTable table block? if not re.match(TABLE_BLOCK_MARKER_PATTERN, wt.cell(0, 0).text.strip()): continue if len(wt.rows) < 5: # Not enough rows for name, destinations, col names, col units, and at least one row of data. # Can't be a StarTable table block. Skip it. continue # Parse table table_name = wt.cell(0, 0).text.strip()[2:] destinations = set(wt.cell(1, 0).text.strip().split(' ')) col_names = [cell.text.strip() for cell in wt.row_cells(2)] col_units = [cell.text.strip() for cell in wt.row_cells(3)] values = [[cell.text.strip() for cell in row.cells] for row in wt.rows[4:]] try: df = pd.DataFrame(columns=col_names, data=values) col_specs = { n: ColumnMetadata(unit=u) for n, u in zip(col_names, col_units) } tables.append( Table(df, name=table_name, col_specs=col_specs, destinations=destinations, origin=TableOrigin(path))) except AssertionError as e: # Malformed table raise WordTableBlockParsingError( f"Unable to parse table block '{table_name}' in document {path}" ) from e return Bundle(tables=tables, origin=TableOrigin(path))