def test_get_tables(self): """Test get tables""" # Setup table_names = ['foo', 'bar', 'tar'] table_data = [ pd.DataFrame({'foo': [1, 2]}), pd.DataFrame({'bar': [3, 4]}), pd.DataFrame({'tar': [5, 6]}) ] # Run metadata = Mock(spec=Metadata) metadata.get_table_names.side_effect = table_names metadata.load_table.side_effect = table_data tables = ['table 1', 'table 2', 'table 3'] result = Metadata.get_tables(metadata, tables=tables) # Asserts expected = { 'table 1': pd.DataFrame({'foo': [1, 2]}), 'table 2': pd.DataFrame({'bar': [3, 4]}), 'table 3': pd.DataFrame({'tar': [5, 6]}) } assert result.keys() == expected.keys() for k, v in result.items(): pd.testing.assert_frame_equal(v, expected[k])
def test_get_tables(self): """Test get table names""" # Setup _metadata = { 'tables': { 'table 1': None, 'table 2': None, 'table 3': None } } metadata = Mock(spec=Metadata) metadata._metadata = _metadata # Run result = Metadata.get_tables(metadata) # Asserts assert sorted(result) == ['table 1', 'table 2', 'table 3']
def _validate_arguments(synth, real, metadata, root_path, table_name): """Validate arguments needed to compute descriptors values. If ``metadata`` is an instance of dict create the ``Metadata`` object. If ``metadata`` is ``None``, ``real`` has to be a ``pandas.DataFrane``. If ``real`` is ``None`` load all the tables and assert that ``synth`` is a ``dict``. Otherwise, ``real`` and ``synth`` must be of the same type. If ``synth`` is not a ``dict``, create a dictionary using the ``table_name``. Assert that ``synth`` and ``real`` must have the same tables. Args: synth (dict or pandas.DataFrame): Synthesized data. real (dict, pandas.DataFrame or None): Real data. metadata (str, dict, Metadata or None): Metadata instance or details needed to build it. root_path (str): Path to the metadata file. table_name (str): Table name used to prepare the metadata object, real and synth dict. Returns: tuple (dict, dict, Metadata): Processed tables and Metadata oject. """ if isinstance(metadata, dict): metadata = Metadata(metadata, root_path) elif metadata is None: if not isinstance(real, pd.DataFrame): raise TypeError( 'If metadata is None, `real` has to be a DataFrame') metadata = Metadata() metadata.add_table(table_name, data=real) if real is None: real = metadata.load_tables() if not isinstance(synth, dict): raise TypeError('If `real` is `None`, `synth` must be a dict') elif not isinstance(synth, type(real)): raise TypeError('`real` and `synth` must be of the same type') if not isinstance(synth, dict): synth = {table_name: synth} if not isinstance(real, dict): real = {table_name: real} if not set(real.keys()) == set(synth.keys()): raise ValueError( 'real and synthetic dataset must have the same tables') if len(real.keys()) < len(metadata.get_tables()): meta_dict = { table: metadata.get_table_meta(table) for table in real.keys() } metadata = Metadata({'tables': meta_dict}) return synth, real, metadata