def independent_benchmark(json = 'adult'):
    train, test, meta, categoricals, ordinals = load_dataset(json, benchmark=True)
    synthesizer = IndependentSynthesizer()
    synthesizer.fit(train, categoricals, ordinals)
    sampled = synthesizer.sample(300)
    print('Sampled Data for 300 records\n')
    scores = evaluate(train, test, sampled, meta)
    print('\nEvaluation Scores from evaluate function:\n')
    return scores
예제 #2
0
def benchmark(synthesizer, datasets=DEFAULT_DATASETS, repeat=3):
    results = list()
    for name in datasets:
        LOGGER.info('Evaluating dataset %s', name)
        train, test, meta, categoricals, ordinals = load_dataset(
            name, benchmark=True)

        for iteration in range(repeat):
            synthesized = synthesizer(train, categoricals, ordinals)
            scores = evaluate(train, test, synthesized, meta)
            scores['dataset'] = name
            scores['iter'] = iteration
            results.append(scores)

    return pd.concat(results)
def benchCLBNSynthesizer():
    from sdgym.synthesizers import CLBNSynthesizer
    from sdgym.evaluate import evaluate

    from sdgym.data import load_dataset
    train, test, meta, categoricals, ordinals = load_dataset('adult',
                                                             benchmark=True)
    synthesizer = CLBNSynthesizer()
    synthesizer.fit(train, categoricals, ordinals)
    sampled = synthesizer.sample(300)
    scores = evaluate(train, test, sampled, meta)
    #scores = scores.append(evaluate(train, test, sampled, meta))
    #scores = scores.append(evaluate(train, test, sampled, meta))
    print('\nEvaluation Scores from evaluate function:\n')
    print(scores)
    scores['Synth'] = 'CLBNSynthesizer'
    scores.to_csv('CLBNBench.csv')
예제 #4
0
def benchmark(synthesizer, datasets=DEFAULT_DATASETS, repeat=3, prefix='tmp'):
    print(datasets)
    results = list()
    for name in datasets:
        try:
            print('Evaluating dataset %s', name)
            train, test, meta, categoricals, ordinals = load_dataset(
                name, benchmark=True)

            for iteration in range(repeat):
                synthesized = synthesizer(train, categoricals, ordinals)
                scores = evaluate(train, test, synthesized, meta)
                scores['dataset'] = name
                scores['iter'] = iteration
                results.append(scores)
            print(results)
            with open(f'{prefix}_{name}.pickle', 'wb') as f:
                pickle.dump(results, f)
        except KeyError:
            print("Here is the KeyError")
            continue

    return pd.concat(results)