Example #1
0
def benchIdentitySynthesizer():
    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)

    with open('generated_metadata.json') as data_file:
        data2 = json.load(data_file)

    categorical_columns = list()
    ordinal_columns = list()

    for column_idx, column in enumerate(data2['columns']):

        if column['type'] == CATEGORICAL:
            print(column)
            print('Classified as Categorical')
            categorical_columns.append(column_idx)
        elif column['type'] == ORDINAL:
            ordinal_columns.append(column_idx)
            print(column)
            print('Classified as Ordinal')

    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(300)
    scores = benchmark(synthesizer.fit_sample)
    scores = scores.append(synthesizer.fit_sample)
    scores = scores.append(synthesizer.fit_sample)
    print('\nEvaluation Scores from evaluate function:\n')
    print(scores)
    scores['Synth'] = 'IdentitySynthesizer'
    scores.to_csv('IdentityBench.csv')
def identity_synthesis(json = 'adult'):
    data, categorical_columns, ordinal_columns = load_dataset(json)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    sampled = synthesizer.sample(10)
    print(sampled)
    np.savetxt('test.txt', sampled, delimiter=',') 
    return sampled
def identity_benchmark(json = 'adult'):
    train, test, meta, categoricals, ordinals = load_dataset(json, benchmark=True)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(train, categoricals, ordinals)
    sampled = synthesizer.sample(300)
    print('Sampled Data for 300 records\n')
    scores = evaluate(train, test, sampled, meta)
    print('\nEvaluation Scores from evaluate function:\n')
    return scores
def identity():
    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)
    categorical_columns = []
    ordinal_columns = []
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(4000)
    np.savetxt("41_identity.csv", sampled, delimiter=",")

    data = pd.read_csv('41_identity.csv', header=None)

    return data
def identitySynth():
    import numpy as np
    from sdgym.constants import CATEGORICAL, ORDINAL
    import json
    from sdgym.synthesizers import IdentitySynthesizer
    from configparser import ConfigParser
    import boto3

    with open('generated_metadata.json') as data_file:
        data = json.load(data_file)

    categorical_columns = list()
    ordinal_columns = list()

    for column_idx, column in enumerate(data['columns']):

        if column['type'] == CATEGORICAL:
            print(column)
            print('Classified as Categorical')
            categorical_columns.append(column_idx)
        elif column['type'] == ORDINAL:
            ordinal_columns.append(column_idx)
            print(column)
            print('Classified as Ordinal')

    # return categorical_columns, ordinal_columns

    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(4000)
    np.savetxt("43_identity.csv", sampled, delimiter=",")
    print(sampled)

    print('Data Synthesized using Identity synthesizer')

    s3 = boto3.resource(
        's3',
        aws_access_key_id='',
        aws_secret_access_key='',
    )

    s3.Bucket('csye7245-1').upload_file('43_identity.csv',
                                        'synth/43_identity.csv')

    print('Synthesized(Identity) Data Uploaded to S3')
def benchIdentitySynthesizer():
    from sdgym.synthesizers import IdentitySynthesizer
    from sdgym.evaluate import evaluate

    from sdgym.data import load_dataset
    train, test, meta, categoricals, ordinals = load_dataset('adult',
                                                             benchmark=True)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(train, categoricals, ordinals)
    sampled = synthesizer.sample(300)
    scores = evaluate(train, test, sampled, meta)
    #scores = scores.append(evaluate(train, test, sampled, meta))
    #scores = scores.append(evaluate(train, test, sampled, meta))
    print('\nEvaluation Scores from evaluate function:\n')
    print(scores)
    scores['Synth'] = 'IdentitySynthesizer'
    scores.to_csv('IdentityBench.csv')
def identity_synthesis(json='adult'):
    data, categorical_columns, ordinal_columns = load_dataset(json)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    sampled = synthesizer.sample(10)
    sampled
Example #8
0
    "census",
    "child",
    "covtype",
    "credit",
    "grid",
    "gridr",
    "insurance",
    "intrusion",
    "mnist12",
    "mnist28",
    "news",
    "ring"
]


synthesizer = IdentitySynthesizer()


MANUAL_TESTS = os.environ.get('MANUAL_TESTS', 'false') == 'true'
MESSAGE = 'set environ variable MANUAL_TESTS="true" to execute'


def test_adult():
    benchmark(synthesizer.fit_sample, repeat=1, datasets=['adult'])


@unittest.skipUnless(MANUAL_TESTS, MESSAGE)
def test_alarm():
    benchmark(synthesizer.fit_sample, repeat=1, datasets=['alarm'])

def evaluatefun():
    from sdgym.synthesizers import IndependentSynthesizer
    from sdgym.evaluate import evaluate

    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)

    with open('generated_metadata.json') as data_file:
        data2 = json.load(data_file)

    categorical_columns = list()
    ordinal_columns = list()

    for column_idx, column in enumerate(data2['columns']):

        if column['type'] == CATEGORICAL:
            print(column)
            print('Classified as Categorical')
            categorical_columns.append(column_idx)
        elif column['type'] == ORDINAL:
            ordinal_columns.append(column_idx)
            print(column)
            print('Classified as Ordinal')

    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    scores = benchmark(synthesizer.fit_sample)

    scores['Synth'] = 'IdentitySynthesizer'
    synthesizer = UniformSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    scores2 = benchmark(synthesizer.fit_sample)
    scores2['Synth'] = 'Uniform'
    synthesizer = IndependentSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    scores3 = benchmark(synthesizer.fit_sample)
    scores3['Synth'] = 'Identity'
    synthesizer = CLBNSynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    scores4 = benchmark(synthesizer.fit_sample)
    scores4['Synth'] = 'CLBN'
    print('\nEvaluation Scores from evaluate function:\n')

    result = scores.append(scores2)
    result = result.append(scores3)
    result = result.append(scores4)

    a = (result[result['accuracy'] == result['accuracy'].max()])

    st.write('Best Performing Synthsizer: ' + str(a['Synth'].item()))
    st.write('Accuracy: ' + str(a['accuracy'].item()))

    st.dataframe(result)