Example #1
0
def benchIdentitySynthesizer():
    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)

    with open('generated_metadata.json') as data_file:
        data2 = json.load(data_file)

    categorical_columns = list()
    ordinal_columns = list()

    for column_idx, column in enumerate(data2['columns']):

        if column['type'] == CATEGORICAL:
            print(column)
            print('Classified as Categorical')
            categorical_columns.append(column_idx)
        elif column['type'] == ORDINAL:
            ordinal_columns.append(column_idx)
            print(column)
            print('Classified as Ordinal')

    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(300)
    scores = benchmark(synthesizer.fit_sample)
    scores = scores.append(synthesizer.fit_sample)
    scores = scores.append(synthesizer.fit_sample)
    print('\nEvaluation Scores from evaluate function:\n')
    print(scores)
    scores['Synth'] = 'IdentitySynthesizer'
    scores.to_csv('IdentityBench.csv')
def identity_synthesis(json = 'adult'):
    data, categorical_columns, ordinal_columns = load_dataset(json)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    sampled = synthesizer.sample(10)
    print(sampled)
    np.savetxt('test.txt', sampled, delimiter=',') 
    return sampled
def identity_benchmark(json = 'adult'):
    train, test, meta, categoricals, ordinals = load_dataset(json, benchmark=True)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(train, categoricals, ordinals)
    sampled = synthesizer.sample(300)
    print('Sampled Data for 300 records\n')
    scores = evaluate(train, test, sampled, meta)
    print('\nEvaluation Scores from evaluate function:\n')
    return scores
def identity():
    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)
    categorical_columns = []
    ordinal_columns = []
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(4000)
    np.savetxt("41_identity.csv", sampled, delimiter=",")

    data = pd.read_csv('41_identity.csv', header=None)

    return data
def identitySynth():
    import numpy as np
    from sdgym.constants import CATEGORICAL, ORDINAL
    import json
    from sdgym.synthesizers import IdentitySynthesizer
    from configparser import ConfigParser
    import boto3

    with open('generated_metadata.json') as data_file:
        data = json.load(data_file)

    categorical_columns = list()
    ordinal_columns = list()

    for column_idx, column in enumerate(data['columns']):

        if column['type'] == CATEGORICAL:
            print(column)
            print('Classified as Categorical')
            categorical_columns.append(column_idx)
        elif column['type'] == ORDINAL:
            ordinal_columns.append(column_idx)
            print(column)
            print('Classified as Ordinal')

    # return categorical_columns, ordinal_columns

    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(4000)
    np.savetxt("43_identity.csv", sampled, delimiter=",")
    print(sampled)

    print('Data Synthesized using Identity synthesizer')

    s3 = boto3.resource(
        's3',
        aws_access_key_id='',
        aws_secret_access_key='',
    )

    s3.Bucket('csye7245-1').upload_file('43_identity.csv',
                                        'synth/43_identity.csv')

    print('Synthesized(Identity) Data Uploaded to S3')
def benchIdentitySynthesizer():
    from sdgym.synthesizers import IdentitySynthesizer
    from sdgym.evaluate import evaluate

    from sdgym.data import load_dataset
    train, test, meta, categoricals, ordinals = load_dataset('adult',
                                                             benchmark=True)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(train, categoricals, ordinals)
    sampled = synthesizer.sample(300)
    scores = evaluate(train, test, sampled, meta)
    #scores = scores.append(evaluate(train, test, sampled, meta))
    #scores = scores.append(evaluate(train, test, sampled, meta))
    print('\nEvaluation Scores from evaluate function:\n')
    print(scores)
    scores['Synth'] = 'IdentitySynthesizer'
    scores.to_csv('IdentityBench.csv')
def identity_synthesis(json='adult'):
    data, categorical_columns, ordinal_columns = load_dataset(json)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    sampled = synthesizer.sample(10)
    sampled