Example #1
0
def benchIndependentSynthesizer():
    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)

    with open('generated_metadata.json') as data_file:
        data2 = json.load(data_file)

    categorical_columns = list()
    ordinal_columns = list()

    for column_idx, column in enumerate(data2['columns']):

        if column['type'] == CATEGORICAL:
            print(column)
            print('Classified as Categorical')
            categorical_columns.append(column_idx)
        elif column['type'] == ORDINAL:
            ordinal_columns.append(column_idx)
            print(column)
            print('Classified as Ordinal')

    synthesizer = UniformSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(300)
    scores = benchmark(synthesizer.fit_sample)
    scores = scores.append(synthesizer.fit_sample)
    scores = scores.append(synthesizer.fit_sample)
    print('\nEvaluation Scores from evaluate function:\n')
    print(scores)
    scores['Synth'] = 'IndependentSynthesizer'
    scores.to_csv('IndependentBench.csv')
def uniform_synthesis(json = 'adult'):
    data, categorical_columns, ordinal_columns = load_dataset(json)
    synthesizer = UniformSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    sampled = synthesizer.sample(10)
    print(sampled)
    np.savetxt('test.txt', sampled, delimiter=',') 
    return sampled
def uniform_benchmark(json = 'adult'):
    train, test, meta, categoricals, ordinals = load_dataset(json, benchmark=True)
    synthesizer = UniformSynthesizer()
    synthesizer.fit(train, categoricals, ordinals)
    sampled = synthesizer.sample(300)
    print('Sampled Data for 300 records\n')
    scores = evaluate(train, test, sampled, meta)
    print('\nEvaluation Scores from evaluate function:\n')
    return scores
def uniform():
    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)
    categorical_columns = []
    ordinal_columns = []
    synthesizer = UniformSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    sampled = synthesizer.sample(4000)
    np.savetxt("41_uniform.csv", sampled, delimiter=",")
    data = pd.read_csv('41_uniform.csv', header=None)

    return data
def uniformSynth():
    import numpy as np
    from sdgym.constants import CATEGORICAL, ORDINAL
    import json
    from sdgym.synthesizers import UniformSynthesizer
    # from configparser import ConfigParser
    import boto3

    # config = ConfigParser()
    # config.read('config.ini')

    # access_key = config.get('aws', 'access_key')
    # secret_access_key = config.get('aws', 'secret_access_key')
    # session_token = config.get('aws', 'session_token')
    with open('generated_metadata.json') as data_file:
        data = json.load(data_file)

    categorical_columns = list()
    ordinal_columns = list()

    for column_idx, column in enumerate(data['columns']):

        if column['type'] == CATEGORICAL:
            print(column)
            print('Classified as Categorical')
            categorical_columns.append(column_idx)
        elif column['type'] == ORDINAL:
            ordinal_columns.append(column_idx)
            print(column)
            print('Classified as Ordinal')

    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)
    synthesizer = UniformSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(4000)
    np.savetxt("41_uniform.csv", sampled, delimiter=",")
    print(sampled)

    print('Data Synthesized using Uniform Synthesizer')

    s3 = boto3.resource(
        's3',
        aws_access_key_id='',
        aws_secret_access_key='',
    )

    s3.Bucket('csye7245-1').upload_file('41_uniform.csv',
                                        'synth/41_uniform.csv')

    print('Synthesized(Uniform) Data Uploaded to S3')
def benchUnifrom():
    from sdgym.synthesizers import IndependentSynthesizer, UniformSynthesizer
    from sdgym.evaluate import evaluate

    from sdgym.data import load_dataset
    train, test, meta, categoricals, ordinals = load_dataset('adult',
                                                             benchmark=True)
    synthesizer = UniformSynthesizer()
    synthesizer.fit(train, categoricals, ordinals)
    sampled = synthesizer.sample(300)
    scores = evaluate(train, test, sampled, meta)
    #scores = scores.append(evaluate(train, test, sampled, meta))
    #scores = scores.append(evaluate(train, test, sampled, meta))
    print('\nEvaluation Scores from evaluate function:\n')
    print(scores)
    scores['Synth'] = 'Uniform'
    scores.to_csv('UniformBench.csv')
def evaluatefun():
    from sdgym.synthesizers import IndependentSynthesizer
    from sdgym.evaluate import evaluate

    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)

    with open('generated_metadata.json') as data_file:
        data2 = json.load(data_file)

    categorical_columns = list()
    ordinal_columns = list()

    for column_idx, column in enumerate(data2['columns']):

        if column['type'] == CATEGORICAL:
            print(column)
            print('Classified as Categorical')
            categorical_columns.append(column_idx)
        elif column['type'] == ORDINAL:
            ordinal_columns.append(column_idx)
            print(column)
            print('Classified as Ordinal')

    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    scores = benchmark(synthesizer.fit_sample)

    scores['Synth'] = 'IdentitySynthesizer'
    synthesizer = UniformSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    scores2 = benchmark(synthesizer.fit_sample)
    scores2['Synth'] = 'Uniform'
    synthesizer = IndependentSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    scores3 = benchmark(synthesizer.fit_sample)
    scores3['Synth'] = 'Identity'
    synthesizer = CLBNSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    scores4 = benchmark(synthesizer.fit_sample)
    scores4['Synth'] = 'CLBN'
    print('\nEvaluation Scores from evaluate function:\n')

    result = scores.append(scores2)
    result = result.append(scores3)
    result = result.append(scores4)

    a = (result[result['accuracy'] == result['accuracy'].max()])

    st.write('Best Performing Synthsizer: ' + str(a['Synth'].item()))
    st.write('Accuracy: ' + str(a['accuracy'].item()))

    st.dataframe(result)
def uniform_synthesis(json='adult'):
    data, categorical_columns, ordinal_columns = load_dataset(json)
    synthesizer = UniformSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    sampled = synthesizer.sample(10)
    sampled