def benchIdentitySynthesizer(): data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1) with open('generated_metadata.json') as data_file: data2 = json.load(data_file) categorical_columns = list() ordinal_columns = list() for column_idx, column in enumerate(data2['columns']): if column['type'] == CATEGORICAL: print(column) print('Classified as Categorical') categorical_columns.append(column_idx) elif column['type'] == ORDINAL: ordinal_columns.append(column_idx) print(column) print('Classified as Ordinal') synthesizer = IdentitySynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) sampled = synthesizer.sample(300) scores = benchmark(synthesizer.fit_sample) scores = scores.append(synthesizer.fit_sample) scores = scores.append(synthesizer.fit_sample) print('\nEvaluation Scores from evaluate function:\n') print(scores) scores['Synth'] = 'IdentitySynthesizer' scores.to_csv('IdentityBench.csv')
def identity_synthesis(json = 'adult'): data, categorical_columns, ordinal_columns = load_dataset(json) synthesizer = IdentitySynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) sampled = synthesizer.sample(10) print(sampled) np.savetxt('test.txt', sampled, delimiter=',') return sampled
def identity_benchmark(json = 'adult'): train, test, meta, categoricals, ordinals = load_dataset(json, benchmark=True) synthesizer = IdentitySynthesizer() synthesizer.fit(train, categoricals, ordinals) sampled = synthesizer.sample(300) print('Sampled Data for 300 records\n') scores = evaluate(train, test, sampled, meta) print('\nEvaluation Scores from evaluate function:\n') return scores
def identity(): data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1) categorical_columns = [] ordinal_columns = [] synthesizer = IdentitySynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) sampled = synthesizer.sample(4000) np.savetxt("41_identity.csv", sampled, delimiter=",") data = pd.read_csv('41_identity.csv', header=None) return data
def identitySynth(): import numpy as np from sdgym.constants import CATEGORICAL, ORDINAL import json from sdgym.synthesizers import IdentitySynthesizer from configparser import ConfigParser import boto3 with open('generated_metadata.json') as data_file: data = json.load(data_file) categorical_columns = list() ordinal_columns = list() for column_idx, column in enumerate(data['columns']): if column['type'] == CATEGORICAL: print(column) print('Classified as Categorical') categorical_columns.append(column_idx) elif column['type'] == ORDINAL: ordinal_columns.append(column_idx) print(column) print('Classified as Ordinal') # return categorical_columns, ordinal_columns data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1) synthesizer = IdentitySynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) sampled = synthesizer.sample(4000) np.savetxt("43_identity.csv", sampled, delimiter=",") print(sampled) print('Data Synthesized using Identity synthesizer') s3 = boto3.resource( 's3', aws_access_key_id='', aws_secret_access_key='', ) s3.Bucket('csye7245-1').upload_file('43_identity.csv', 'synth/43_identity.csv') print('Synthesized(Identity) Data Uploaded to S3')
def benchIdentitySynthesizer(): from sdgym.synthesizers import IdentitySynthesizer from sdgym.evaluate import evaluate from sdgym.data import load_dataset train, test, meta, categoricals, ordinals = load_dataset('adult', benchmark=True) synthesizer = IdentitySynthesizer() synthesizer.fit(train, categoricals, ordinals) sampled = synthesizer.sample(300) scores = evaluate(train, test, sampled, meta) #scores = scores.append(evaluate(train, test, sampled, meta)) #scores = scores.append(evaluate(train, test, sampled, meta)) print('\nEvaluation Scores from evaluate function:\n') print(scores) scores['Synth'] = 'IdentitySynthesizer' scores.to_csv('IdentityBench.csv')
def identity_synthesis(json='adult'): data, categorical_columns, ordinal_columns = load_dataset(json) synthesizer = IdentitySynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) sampled = synthesizer.sample(10) sampled
"census", "child", "covtype", "credit", "grid", "gridr", "insurance", "intrusion", "mnist12", "mnist28", "news", "ring" ] synthesizer = IdentitySynthesizer() MANUAL_TESTS = os.environ.get('MANUAL_TESTS', 'false') == 'true' MESSAGE = 'set environ variable MANUAL_TESTS="true" to execute' def test_adult(): benchmark(synthesizer.fit_sample, repeat=1, datasets=['adult']) @unittest.skipUnless(MANUAL_TESTS, MESSAGE) def test_alarm(): benchmark(synthesizer.fit_sample, repeat=1, datasets=['alarm'])
def evaluatefun(): from sdgym.synthesizers import IndependentSynthesizer from sdgym.evaluate import evaluate data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1) with open('generated_metadata.json') as data_file: data2 = json.load(data_file) categorical_columns = list() ordinal_columns = list() for column_idx, column in enumerate(data2['columns']): if column['type'] == CATEGORICAL: print(column) print('Classified as Categorical') categorical_columns.append(column_idx) elif column['type'] == ORDINAL: ordinal_columns.append(column_idx) print(column) print('Classified as Ordinal') synthesizer = IdentitySynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) scores = benchmark(synthesizer.fit_sample) scores['Synth'] = 'IdentitySynthesizer' synthesizer = UniformSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) scores2 = benchmark(synthesizer.fit_sample) scores2['Synth'] = 'Uniform' synthesizer = IndependentSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) scores3 = benchmark(synthesizer.fit_sample) scores3['Synth'] = 'Identity' synthesizer = CLBNSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) scores4 = benchmark(synthesizer.fit_sample) scores4['Synth'] = 'CLBN' print('\nEvaluation Scores from evaluate function:\n') result = scores.append(scores2) result = result.append(scores3) result = result.append(scores4) a = (result[result['accuracy'] == result['accuracy'].max()]) st.write('Best Performing Synthsizer: ' + str(a['Synth'].item())) st.write('Accuracy: ' + str(a['accuracy'].item())) st.dataframe(result)