コード例 #1
0
ファイル: test_metadata.py プロジェクト: hyperdimensions/SDV
    def test_load_tables(self):
        """Test get tables"""
        # Setup
        table_names = ['foo', 'bar', 'tar']
        table_data = [
            pd.DataFrame({'foo': [1, 2]}),
            pd.DataFrame({'bar': [3, 4]}),
            pd.DataFrame({'tar': [5, 6]})
        ]
        metadata = Mock(spec=Metadata)
        metadata.get_tables.side_effect = table_names
        metadata.load_table.side_effect = table_data

        # Run
        tables = ['table 1', 'table 2', 'table 3']
        result = Metadata.load_tables(metadata, tables=tables)

        # Asserts
        expected = {
            'table 1': pd.DataFrame({'foo': [1, 2]}),
            'table 2': pd.DataFrame({'bar': [3, 4]}),
            'table 3': pd.DataFrame({'tar': [5, 6]})
        }
        assert result.keys() == expected.keys()

        for k, v in result.items():
            pd.testing.assert_frame_equal(v, expected[k])
コード例 #2
0
ファイル: demo.py プロジェクト: time-series-tools/SDV
def _load_demo_dataset(dataset_name, data_path):
    dataset_path = _get_dataset_path(dataset_name, data_path)
    meta = Metadata(metadata=os.path.join(dataset_path, 'metadata.json'))
    tables = {
        name: _dtypes64(table)
        for name, table in meta.load_tables().items()
    }
    return meta, tables
コード例 #3
0
def _validate_arguments(synth, real, metadata, root_path, table_name):
    """Validate arguments needed to compute descriptors values.

    If ``metadata`` is an instance of dict create the ``Metadata`` object.
    If ``metadata`` is ``None``, ``real`` has to be a ``pandas.DataFrane``.

    If ``real`` is ``None`` load all the tables and assert that ``synth`` is a ``dict``.
    Otherwise, ``real`` and ``synth`` must be of the same type.

    If ``synth`` is not a ``dict``, create a dictionary using the ``table_name``.

    Assert that ``synth`` and ``real`` must have the same tables.

    Args:
        synth (dict or pandas.DataFrame):
            Synthesized data.
        real (dict, pandas.DataFrame or None):
            Real data.
        metadata (str, dict, Metadata or None):
            Metadata instance or details needed to build it.
        root_path (str):
            Path to the metadata file.
        table_name (str):
            Table name used to prepare the metadata object, real and synth dict.

    Returns:
        tuple (dict, dict, Metadata):
            Processed tables and Metadata oject.
    """
    if isinstance(metadata, dict):
        metadata = Metadata(metadata, root_path)
    elif metadata is None:
        if not isinstance(real, pd.DataFrame):
            raise TypeError(
                'If metadata is None, `real` has to be a DataFrame')

        metadata = Metadata()
        metadata.add_table(table_name, data=real)

    if real is None:
        real = metadata.load_tables()
        if not isinstance(synth, dict):
            raise TypeError('If `real` is `None`, `synth` must be a dict')

    elif not isinstance(synth, type(real)):
        raise TypeError('`real` and `synth` must be of the same type')

    if not isinstance(synth, dict):
        synth = {table_name: synth}

    if not isinstance(real, dict):
        real = {table_name: real}

    if not set(real.keys()) == set(synth.keys()):
        raise ValueError(
            'real and synthetic dataset must have the same tables')

    return synth, real, metadata
コード例 #4
0
ファイル: demo.py プロジェクト: TrendingTechnology/SDV
def _load_demo_dataset(dataset_name, data_path):
    dataset_path = _get_dataset_path(dataset_name, data_path)
    meta = Metadata(metadata=os.path.join(dataset_path, 'metadata.json'))
    tables = meta.load_tables()
    return meta, tables