Python IdentitySynthesizer.sample Examples

Programming Language: Python

Namespace/Package Name: sdgym.synthesizers

Method/Function: sample

Examples at hotexamples.com: 7

Python IdentitySynthesizer.sample - 7 examples found. These are the top rated real world Python examples of sdgym.synthesizers.IdentitySynthesizer.sample extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

IdentitySynthesizer(9)

fit(8)

sample(7)

Example #1

Show file

def benchIdentitySynthesizer():
    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)

    with open('generated_metadata.json') as data_file:
        data2 = json.load(data_file)

    categorical_columns = list()
    ordinal_columns = list()

    for column_idx, column in enumerate(data2['columns']):

        if column['type'] == CATEGORICAL:
            print(column)
            print('Classified as Categorical')
            categorical_columns.append(column_idx)
        elif column['type'] == ORDINAL:
            ordinal_columns.append(column_idx)
            print(column)
            print('Classified as Ordinal')

    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(300)
    scores = benchmark(synthesizer.fit_sample)
    scores = scores.append(synthesizer.fit_sample)
    scores = scores.append(synthesizer.fit_sample)
    print('\nEvaluation Scores from evaluate function:\n')
    print(scores)
    scores['Synth'] = 'IdentitySynthesizer'
    scores.to_csv('IdentityBench.csv')

Example #2

Show file

File: sdapp.py Project: kunaljaiswal4393/Data-Engineering-Academic-Projects

def identity_synthesis(json = 'adult'):
    data, categorical_columns, ordinal_columns = load_dataset(json)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    sampled = synthesizer.sample(10)
    print(sampled)
    np.savetxt('test.txt', sampled, delimiter=',') 
    return sampled

Example #3

Show file

File: sdapp.py Project: kunaljaiswal4393/Data-Engineering-Academic-Projects

def identity_benchmark(json = 'adult'):
    train, test, meta, categoricals, ordinals = load_dataset(json, benchmark=True)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(train, categoricals, ordinals)
    sampled = synthesizer.sample(300)
    print('Sampled Data for 300 records\n')
    scores = evaluate(train, test, sampled, meta)
    print('\nEvaluation Scores from evaluate function:\n')
    return scores

Example #4

Show file

File: streamlitdemo2.py Project: UthsavShetty/Data-PipelineCSYE7245

def identity():
    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)
    categorical_columns = []
    ordinal_columns = []
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(4000)
    np.savetxt("41_identity.csv", sampled, delimiter=",")

    data = pd.read_csv('41_identity.csv', header=None)

    return data

Example #5

Show file

File: tutorial.py Project: UthsavShetty/Data-PipelineCSYE7245

def identitySynth():
    import numpy as np
    from sdgym.constants import CATEGORICAL, ORDINAL
    import json
    from sdgym.synthesizers import IdentitySynthesizer
    from configparser import ConfigParser
    import boto3

    with open('generated_metadata.json') as data_file:
        data = json.load(data_file)

    categorical_columns = list()
    ordinal_columns = list()

    for column_idx, column in enumerate(data['columns']):

        if column['type'] == CATEGORICAL:
            print(column)
            print('Classified as Categorical')
            categorical_columns.append(column_idx)
        elif column['type'] == ORDINAL:
            ordinal_columns.append(column_idx)
            print(column)
            print('Classified as Ordinal')

    # return categorical_columns, ordinal_columns

    data = np.loadtxt('down_data.csv', delimiter=',', skiprows=1)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)

    sampled = synthesizer.sample(4000)
    np.savetxt("43_identity.csv", sampled, delimiter=",")
    print(sampled)

    print('Data Synthesized using Identity synthesizer')

    s3 = boto3.resource(
        's3',
        aws_access_key_id='',
        aws_secret_access_key='',
    )

    s3.Bucket('csye7245-1').upload_file('43_identity.csv',
                                        'synth/43_identity.csv')

    print('Synthesized(Identity) Data Uploaded to S3')

Example #6

Show file

File: tutorial.py Project: UthsavShetty/Data-PipelineCSYE7245

def benchIdentitySynthesizer():
    from sdgym.synthesizers import IdentitySynthesizer
    from sdgym.evaluate import evaluate

    from sdgym.data import load_dataset
    train, test, meta, categoricals, ordinals = load_dataset('adult',
                                                             benchmark=True)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(train, categoricals, ordinals)
    sampled = synthesizer.sample(300)
    scores = evaluate(train, test, sampled, meta)
    #scores = scores.append(evaluate(train, test, sampled, meta))
    #scores = scores.append(evaluate(train, test, sampled, meta))
    print('\nEvaluation Scores from evaluate function:\n')
    print(scores)
    scores['Synth'] = 'IdentitySynthesizer'
    scores.to_csv('IdentityBench.csv')

Example #7

Show file

File: streamlit.py Project: kunaljaiswal4393/Data-Engineering-Academic-Projects

def identity_synthesis(json='adult'):
    data, categorical_columns, ordinal_columns = load_dataset(json)
    synthesizer = IdentitySynthesizer()
    synthesizer.fit(data, categorical_columns, ordinal_columns)
    sampled = synthesizer.sample(10)
    sampled