def benchIndependentSynthesizer(): data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1) with open('generated_metadata.json') as data_file: data2 = json.load(data_file) categorical_columns = list() ordinal_columns = list() for column_idx, column in enumerate(data2['columns']): if column['type'] == CATEGORICAL: print(column) print('Classified as Categorical') categorical_columns.append(column_idx) elif column['type'] == ORDINAL: ordinal_columns.append(column_idx) print(column) print('Classified as Ordinal') synthesizer = UniformSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) sampled = synthesizer.sample(300) scores = benchmark(synthesizer.fit_sample) scores = scores.append(synthesizer.fit_sample) scores = scores.append(synthesizer.fit_sample) print('\nEvaluation Scores from evaluate function:\n') print(scores) scores['Synth'] = 'IndependentSynthesizer' scores.to_csv('IndependentBench.csv')
def uniform_synthesis(json = 'adult'): data, categorical_columns, ordinal_columns = load_dataset(json) synthesizer = UniformSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) sampled = synthesizer.sample(10) print(sampled) np.savetxt('test.txt', sampled, delimiter=',') return sampled
def uniform_benchmark(json = 'adult'): train, test, meta, categoricals, ordinals = load_dataset(json, benchmark=True) synthesizer = UniformSynthesizer() synthesizer.fit(train, categoricals, ordinals) sampled = synthesizer.sample(300) print('Sampled Data for 300 records\n') scores = evaluate(train, test, sampled, meta) print('\nEvaluation Scores from evaluate function:\n') return scores
def uniform(): data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1) categorical_columns = [] ordinal_columns = [] synthesizer = UniformSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) sampled = synthesizer.sample(4000) np.savetxt("41_uniform.csv", sampled, delimiter=",") data = pd.read_csv('41_uniform.csv', header=None) return data
def uniformSynth(): import numpy as np from sdgym.constants import CATEGORICAL, ORDINAL import json from sdgym.synthesizers import UniformSynthesizer # from configparser import ConfigParser import boto3 # config = ConfigParser() # config.read('config.ini') # access_key = config.get('aws', 'access_key') # secret_access_key = config.get('aws', 'secret_access_key') # session_token = config.get('aws', 'session_token') with open('generated_metadata.json') as data_file: data = json.load(data_file) categorical_columns = list() ordinal_columns = list() for column_idx, column in enumerate(data['columns']): if column['type'] == CATEGORICAL: print(column) print('Classified as Categorical') categorical_columns.append(column_idx) elif column['type'] == ORDINAL: ordinal_columns.append(column_idx) print(column) print('Classified as Ordinal') data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1) synthesizer = UniformSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) sampled = synthesizer.sample(4000) np.savetxt("41_uniform.csv", sampled, delimiter=",") print(sampled) print('Data Synthesized using Uniform Synthesizer') s3 = boto3.resource( 's3', aws_access_key_id='', aws_secret_access_key='', ) s3.Bucket('csye7245-1').upload_file('41_uniform.csv', 'synth/41_uniform.csv') print('Synthesized(Uniform) Data Uploaded to S3')
def benchUnifrom(): from sdgym.synthesizers import IndependentSynthesizer, UniformSynthesizer from sdgym.evaluate import evaluate from sdgym.data import load_dataset train, test, meta, categoricals, ordinals = load_dataset('adult', benchmark=True) synthesizer = UniformSynthesizer() synthesizer.fit(train, categoricals, ordinals) sampled = synthesizer.sample(300) scores = evaluate(train, test, sampled, meta) #scores = scores.append(evaluate(train, test, sampled, meta)) #scores = scores.append(evaluate(train, test, sampled, meta)) print('\nEvaluation Scores from evaluate function:\n') print(scores) scores['Synth'] = 'Uniform' scores.to_csv('UniformBench.csv')
def evaluatefun(): from sdgym.synthesizers import IndependentSynthesizer from sdgym.evaluate import evaluate data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1) with open('generated_metadata.json') as data_file: data2 = json.load(data_file) categorical_columns = list() ordinal_columns = list() for column_idx, column in enumerate(data2['columns']): if column['type'] == CATEGORICAL: print(column) print('Classified as Categorical') categorical_columns.append(column_idx) elif column['type'] == ORDINAL: ordinal_columns.append(column_idx) print(column) print('Classified as Ordinal') synthesizer = IdentitySynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) scores = benchmark(synthesizer.fit_sample) scores['Synth'] = 'IdentitySynthesizer' synthesizer = UniformSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) scores2 = benchmark(synthesizer.fit_sample) scores2['Synth'] = 'Uniform' synthesizer = IndependentSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) scores3 = benchmark(synthesizer.fit_sample) scores3['Synth'] = 'Identity' synthesizer = CLBNSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) scores4 = benchmark(synthesizer.fit_sample) scores4['Synth'] = 'CLBN' print('\nEvaluation Scores from evaluate function:\n') result = scores.append(scores2) result = result.append(scores3) result = result.append(scores4) a = (result[result['accuracy'] == result['accuracy'].max()]) st.write('Best Performing Synthsizer: ' + str(a['Synth'].item())) st.write('Accuracy: ' + str(a['accuracy'].item())) st.dataframe(result)
def uniform_synthesis(json='adult'): data, categorical_columns, ordinal_columns = load_dataset(json) synthesizer = UniformSynthesizer() synthesizer.fit(data, categorical_columns, ordinal_columns) sampled = synthesizer.sample(10) sampled