def test_evaluate_expression_syntax_error(self):
     df = pd.DataFrame(data=[[nan, 'gnu', 3], [4, 'gnat', '{{((((*+-/ x y heres_a_syntax_error!!!!!!!!!!!!!!!!}}']],
                       columns=['a', 'b', 'c'])
     col_specs = {n: ColumnMetadata(Unit(u)) for n, u in zip(['a', 'b', 'c'], ['-', 'text', 'm'])}
     t = Table(df=df, name='some_table', col_specs=col_specs, destinations=['success', 'glory'])
     with raises(SyntaxError, match=r"Syntax error in expression in table 'some_table', column 2, row 1"):
         t.evaluate_expressions({'x': 7, 'y': 9}, inplace=False)
 def test_convert_to_ref_units(self):
     df = pd.DataFrame([
         [11, 12, 13],
         [21, 22, 23]], columns=['a', 'b', 'c'])
     cs = {n: ColumnMetadata(Unit(u)) for n, u in zip(['a', 'b', 'c'], ['m', 'mm', 'km'])}
     t = Table(df, name='Fool', col_specs=cs)
     cup = CustomUnitPolicy([
         ScaleUnitConversion(Unit('mm'), Unit('m'), 0.001),
         ScaleUnitConversion(Unit('km'), Unit('m'), 1000)])
     t_ref = t.convert_to_ref_units(cup, inplace=False)
     assert t_ref.col_units == [Unit('m')] * 3
     assert (np.array(t_ref.df) == np.array([[11, 0.012, 13000],
                                             [21, 0.022, 23000]])).all()
 def test_convert_to_home_units(self):
     df = pd.DataFrame([
         [11, 12, 13],
         [21, 22, 23]], columns=['a', 'b', 'c'])
     cs = {n: ColumnMetadata(Unit(u), Unit(hu)) for n, u, hu in zip(
         ['a', 'b', 'c'], ['m', 'm', 'm'], ['m', 'mm', 'km'])}
     t = Table(df, name='Fool', col_specs=cs)
     cup = CustomUnitPolicy([
         ScaleUnitConversion(Unit('mm'), Unit('m'), 0.001),
         ScaleUnitConversion(Unit('km'), Unit('m'), 1000)])
     t_home = t.convert_to_home_units(cup)
     assert t_home.col_units == [Unit('m'), Unit('mm'), Unit('km')]
     assert (np.array(t_home.df) == np.array([[11, 12000, 0.013],
                                              [21, 22000, 0.023]])).all()
    def test_convert_to_ref_units_unknown_unit(self):
        df = pd.DataFrame([
            [11, 12, 13],
            [21, 22, 23]], columns=['a', 'b', 'c'])
        cs = {n: ColumnMetadata(Unit(u)) for n, u in zip(['a', 'b', 'c'], ['m', 'mm', 'km'])}
        t = Table(df, name='Fool', col_specs=cs)
        cup_no_km = CustomUnitPolicy([ScaleUnitConversion(Unit('mm'), Unit('m'), 0.001)])
        t_ref_no_km = t.convert_to_ref_units(cup_no_km, inplace=False, units_not_in_policy='ignore')
        assert t_ref_no_km.col_units == [Unit('m'), Unit('m'), Unit('km')]
        assert (np.array(t_ref_no_km.df) == np.array([[11, 0.012, 13],
                                                      [21, 0.022, 23]])).all()

        with raises(ValueError):
            t.convert_to_ref_units(cup_no_km, inplace=False, units_not_in_policy='raise')
 def test_to_csv_nonstring_colnames_and_destinations(self):
     # PS-53 Bundle.to_csv() fails when column names are not strings
     nonstring_colnames = [1.234, 666.0, 42.0]
     nonstring_destinations = [1984, 2001.2001]
     df = pd.DataFrame(data=[[nan, 'gnu', 3], [4, 'gnat', '{{(+ x y)}}']], columns=nonstring_colnames)
     col_specs = {n: ColumnMetadata(Unit(u)) for n, u in zip(nonstring_colnames, ['-', 'text', 'm'])}
     t = Table(df=df, name='some_table', col_specs=col_specs,
               destinations=nonstring_destinations)
     out = io.StringIO()
     t.to_csv(out)
     assert out.getvalue() == dedent("""\
         **some_table;;
         1984 2001.2001
         1.234;666.0;42.0
         -;text;m
         -;gnu;3
         4.0;gnat;{{(+ x y)}}
         
         """)
 def col_specs_with_format(self):
     return {'a': ColumnMetadata(Unit('-'), format_str='${:,.2f}'),
             'b': ColumnMetadata(Unit('text')),
             'c': ColumnMetadata(Unit('m'), format_str='.4e')}
 def col_specs(self):
     return {n: ColumnMetadata(Unit(u)) for n, u in zip(['a', 'b', 'c'], ['-', 'text', 'm'])}
Exemple #8
0
def import_from_word(path: Union[str, Path]) -> Bundle:
    """
    Imports table blocks from tables in an MS Word docx file.
    Only reads those Word tables whose top left cell is a table block start marker ('**table_name').

    MS Word documents cannot strictly live up to the StarTable format specification and therefore
    this function is not to be considered a formal reader. Rather, it is a utility that
    merely attempts to import table blocks.

    :param path: The path to the docx file
    :return: Bundle containing the imported tables
    """
    try:
        import docx
    except ImportError:
        raise ImportError(
            "Missing optional dependency 'docx'. Install python-docx package "
            "for MS Word support. Use pip or conda to install python-docx."
        ) from None

    path = str(path)

    if not os.path.exists(path):
        raise IOError(f'File not found: {path}')

    word_doc = docx.Document(path)

    if not isinstance(word_doc, docx.document.Document):
        raise IOError(f'Not a docx Document: {word_doc}')

    tables = []
    for wt in word_doc.tables:

        # Does it even look like a StarTable table block?
        if not re.match(TABLE_BLOCK_MARKER_PATTERN,
                        wt.cell(0, 0).text.strip()):
            continue
        if len(wt.rows) < 5:
            # Not enough rows for name, destinations, col names, col units, and at least one row of data.
            # Can't be a StarTable table block. Skip it.
            continue

        # Parse table
        table_name = wt.cell(0, 0).text.strip()[2:]
        destinations = set(wt.cell(1, 0).text.strip().split(' '))
        col_names = [cell.text.strip() for cell in wt.row_cells(2)]
        col_units = [cell.text.strip() for cell in wt.row_cells(3)]
        values = [[cell.text.strip() for cell in row.cells]
                  for row in wt.rows[4:]]

        try:
            df = pd.DataFrame(columns=col_names, data=values)
            col_specs = {
                n: ColumnMetadata(unit=u)
                for n, u in zip(col_names, col_units)
            }
            tables.append(
                Table(df,
                      name=table_name,
                      col_specs=col_specs,
                      destinations=destinations,
                      origin=TableOrigin(path)))
        except AssertionError as e:
            # Malformed table
            raise WordTableBlockParsingError(
                f"Unable to parse table block '{table_name}' in document {path}"
            ) from e

    return Bundle(tables=tables, origin=TableOrigin(path))