Beispiel #1
0
def test_correct_number_of_rows_are_generated():

    df = gen.generate(
        props={
            'region':
            gen.choice(data=['EMEA', 'LATAM', 'NAM', 'APAC'],
                       weights=[0.1, 0.1, 0.3, 0.5]),
            'sic_range':
            gen.sic_range(),
            'sic':
            gen.sic_industry(sic_range_field='sic_range'),
            'country':
            gen.country_codes(region_field='region'),
            'client_name':
            gen.company_namer(field='sic',
                              field_type='sic',
                              countrycode_field='country')
        },
        count=50,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

    df['sic_range'] = df['sic_range'].apply(lambda x: x.name)
    df['sic'] = df['sic'].apply(lambda x: x.name)
    df['country'] = df['country'].apply(lambda x: x.alpha3_code)

    print(df)
def test_countries_are_in_nam_target_region():
    result = gen.generate(
        props={
            "region": gen.choice(['NAM']),
            "country": gen.country_codes(region_field="region")
        },
        count=100,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

    country_list = data.countries(region="NAM")

    for row in result['country']:
        assert row in country_list
def test_correct_number_of_rows_are_generated():
    df = gen.generate(
        props={
            'region':
            gen.choice(data=['EMEA', 'LATAM', 'NAM', 'APAC'],
                       weights=[0.1, 0.1, 0.3, 0.5]),
            "country":
            gen.country_codes(region_field='region'),
            "contact_name":
            gen.person(country_field='country')
        },
        count=50,
        randomstate=np.random.RandomState(13031981)).to_dataframe()
Beispiel #4
0
def run(seed=130319810):
    regions = ['NAM', 'EMEA', 'APAC', 'LATAM']
    region_weights = [0.5, 0.3, 0.1, 0.1]

    df = gen.generate(
        props={
            'region': gen.choice(
                data=regions,
                weights=region_weights),
            'country': gen.country_codes(
                region_field='region'),
            'secondary-region': gen.choice(
                data=regions,
                weights=region_weights),                
            'secondary-country': gen.country_codes(
                region_field='secondary-region'),
            'industry': gen.sic_range(),
            'industry_code': gen.sic_industry('industry'),
            'legal-name': gen.company_namer(
                field='industry_code',
                countrycode_field='country'),
            'lei_code': gen.lei_code()
        },
        count=50,
        randomstate=np.random.RandomState(seed)
    ).to_dataframe()

    # Cleanup the country and add the CCY
    df['prefered_ccy'] = df['country'].apply(lambda x: x.currency)
    df['country'] = df['country'].apply(lambda x: x.alpha3_code)

    df['secondary_ccy'] = df['secondary-country'].apply(lambda x: x.currency)
    df['secondary-country'] = df['secondary-country'].apply(lambda x: x.alpha3_code)


    print(df)
    return df
Beispiel #5
0
def test_correct_number_of_rows_are_generated():
    df = gen.generate(
        props={
            'region':
            gen.choice(data=['EMEA', 'LATAM', 'NAM', 'APAC'],
                       weights=[0.1, 0.1, 0.3, 0.5]),
            "country":
            gen.country_codes(region_field='region'),
            "client_type":
            gen.choice(data=data.client_types()),
            "client_name":
            gen.company_namer(field='client_type',
                              field_type='client_type',
                              countrycode_field='country')
        },
        count=50,
        randomstate=np.random.RandomState(13031981)).to_dataframe()
def test_address_generation():
    df = gen.generate(
        props={
            'region': gen.choice(['NAM', 'EMEA', 'APAC', 'LATAM']),
            'country': gen.country_codes(region_field='region'),
            'address': gen.address('country')
        },
        count=100,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

    ## post process, adress is an object
    df['country'] = df['country'].apply(lambda x: x.alpha2_code)
    df['address_1'] = df['address'].apply(lambda x: x.address_1)
    df['address_2'] = df['address'].apply(lambda x: x.address_2)
    df['city'] = df['address'].apply(lambda x: x.city)
    df['state'] = df['address'].apply(lambda x: x.state)
    df['postal)code'] = df['address'].apply(lambda x: x.postal_code)

    del df['address']
Beispiel #7
0
def test_normal_sampler_bound():

    df = gen.generate_from_model(
        props={
            'country': gen.country_codes(sampler=gen.bound_normal)
        },
        count=100000,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

    df['country'] = df['country'].map(lambda x: x.alpha2_code)

    ax = df['country'].value_counts().plot(kind='bar')
    ax = df['country'].value_counts().plot(x='month',
                                           linestyle='-',
                                           marker='o',
                                           ax=ax)
    ax.set_xlabel("Country")
    ax.set_ylabel("Count")
    plt.show()
Beispiel #8
0
def test_correct_number_of_rows_are_generated():

    df = gen.generate(props={
        'firm_account':
        gen.choice(data=['A', 'B', 'C']),
        'region':
        gen.choice(data=['NAM', 'EMEA', 'LATAM', 'APAC']),
        'country':
        gen.country_codes(region_field="region"),
        'intraday_pnl':
        gen.random_range(low=-1000, high=1000, round_dp=2),
        'trade_year':
        gen.choice(
            data=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019])
    },
                      count=50,
                      randomstate=np.random.RandomState(
                          13031981)).to_dataframe()

    df['ccy'] = df['country'].apply(lambda x: x.currency)
    df['country'] = df['country'].apply(lambda x: x.alpha3_code)
    df['trade_date'] = df['trade_year'].apply(lambda x: f"{x}-01-01")
import numpy as np
import datahub_core.generators as gen
from datahub_core.models import MarkovModel
RS = np.random.RandomState(13031981)

MODEL = MarkovModel(filename='./tests/client_data.json', randomstate=RS)

RESULT = gen.generate_from_model(props={
    "country":
    gen.country_codes(region_field='region'),
    "ev":
    gen.random_range(high=100000, low=10000000),
    "address":
    gen.address(country_field='country'),
    "contact_name":
    gen.person(country_field='country'),
    "client_name":
    gen.company_namer(field='client_type',
                      field_type='client_type',
                      countrycode_field='country')
},
                                 count=50,
                                 model=MODEL)


def test_correct_number_of_rows_are_generated():
    df = RESULT.to_dataframe()

    # remap obects into flat table
    df['country'] = df['country'].map(lambda x: x.alpha3_code)
    df['city'] = df['address'].map(lambda x: x.city)