def evaluate(synthetic_data, real_data=None, metadata=None, root_path=None, table_name=None, metrics=None, aggregate=True): """Apply multiple metrics at once. Args: synthetic_data (dict[str, pandas.DataFrame] or pandas.DataFrame): Map of names and tables of synthesized data. When evaluating a single table, a single ``pandas.DataFrame`` can be passed alone. real_data (dict[str, pandas.DataFrame] or pandas.DataFrame): Map of names and tables of real data. When evaluating a single table, a single ``pandas.DataFrame`` can be passed alone. metadata (str, dict, Metadata or None): Metadata instance or details needed to build it. root_path (str): Relative path to find the metadata.json file when needed. metrics (list[str]): List of metric names to apply. table_name (str): Table name to be evaluated, only used when ``synthetic_data`` is a ``pandas.DataFrame`` and ``real_data`` is ``None``. aggregate (bool): If ``get_report`` is ``False``, whether to compute the mean of all the scores to return a single float value or return a ``dict`` containing the score that each metric obtained. Defaults to ``True``. Return: float or sdmetrics.MetricsReport """ metrics, modality = _select_metrics(synthetic_data, metrics) synthetic_data, real_data, metadata = _validate_arguments( synthetic_data, real_data, metadata, root_path, table_name) if modality == 'single-table': table = list(metadata['tables'].keys())[0] metadata = metadata['tables'][table] real_data = real_data[table] synthetic_data = synthetic_data[table] scores = sdmetrics.compute_metrics(metrics, real_data, synthetic_data, metadata=metadata) scores.dropna(inplace=True) if aggregate: infinites = (scores.min_value == -np.inf) & (scores.max_value == np.inf) scores.loc[infinites, 'score'] = np.tanh(scores.loc[infinites, 'score']) return scores.score.mean() return scores
def test_compute_all(): real_data, synthetic_data, metadata = load_multi_table_demo() output = compute_metrics(MultiTableMetric.get_subclasses(), real_data, synthetic_data, metadata=metadata) assert not pd.isna(output.raw_score.mean()) scores = output[output.raw_score.notna()] assert scores.raw_score.between(scores.min_value, scores.max_value).all()
def test_compute_all(): real_data, synthetic_data, metadata = load_timeseries_demo() output = compute_metrics( TimeSeriesMetric.get_subclasses(), real_data, synthetic_data, metadata=metadata ) assert not pd.isnull(output.score.mean()) scores = output[output.score.notnull()] assert scores.score.between(scores.min_value, scores.max_value).all()
def test_compute_all(): real_data, synthetic_data, metadata = load_single_table_demo() output = compute_metrics( SingleTableMetric.get_subclasses(), real_data, synthetic_data, metadata=metadata ) assert not pd.isnull(output.raw_score.mean()) scores = output[output.raw_score.notnull()] assert scores.raw_score.between(scores.min_value, scores.max_value).all() scores = output[output.normalized_score.notnull()] assert scores.normalized_score.between(0.0, 1.0).all()