def independent_benchmark(json = 'adult'): train, test, meta, categoricals, ordinals = load_dataset(json, benchmark=True) synthesizer = IndependentSynthesizer() synthesizer.fit(train, categoricals, ordinals) sampled = synthesizer.sample(300) print('Sampled Data for 300 records\n') scores = evaluate(train, test, sampled, meta) print('\nEvaluation Scores from evaluate function:\n') return scores
def benchmark(synthesizer, datasets=DEFAULT_DATASETS, repeat=3): results = list() for name in datasets: LOGGER.info('Evaluating dataset %s', name) train, test, meta, categoricals, ordinals = load_dataset( name, benchmark=True) for iteration in range(repeat): synthesized = synthesizer(train, categoricals, ordinals) scores = evaluate(train, test, synthesized, meta) scores['dataset'] = name scores['iter'] = iteration results.append(scores) return pd.concat(results)
def benchCLBNSynthesizer(): from sdgym.synthesizers import CLBNSynthesizer from sdgym.evaluate import evaluate from sdgym.data import load_dataset train, test, meta, categoricals, ordinals = load_dataset('adult', benchmark=True) synthesizer = CLBNSynthesizer() synthesizer.fit(train, categoricals, ordinals) sampled = synthesizer.sample(300) scores = evaluate(train, test, sampled, meta) #scores = scores.append(evaluate(train, test, sampled, meta)) #scores = scores.append(evaluate(train, test, sampled, meta)) print('\nEvaluation Scores from evaluate function:\n') print(scores) scores['Synth'] = 'CLBNSynthesizer' scores.to_csv('CLBNBench.csv')
def benchmark(synthesizer, datasets=DEFAULT_DATASETS, repeat=3, prefix='tmp'): print(datasets) results = list() for name in datasets: try: print('Evaluating dataset %s', name) train, test, meta, categoricals, ordinals = load_dataset( name, benchmark=True) for iteration in range(repeat): synthesized = synthesizer(train, categoricals, ordinals) scores = evaluate(train, test, synthesized, meta) scores['dataset'] = name scores['iter'] = iteration results.append(scores) print(results) with open(f'{prefix}_{name}.pickle', 'wb') as f: pickle.dump(results, f) except KeyError: print("Here is the KeyError") continue return pd.concat(results)