Python generate Examples

Programming Language: Python

Namespace/Package Name: datalab_cohorts.expectation_generators

Method/Function: generate

Examples at hotexamples.com: 8

Python generate - 8 examples found. These are the top rated real world Python examples of datalab_cohorts.expectation_generators.generate extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_expectations.py Project: jamesscottbrown/opensafely-research-template

def test_data_generator_category_and_date():
    population_size = 10000
    incidence = 0.2
    return_expectations = {
        "rate": "exponential_increase",
        "incidence": incidence,
        "category": {
            "ratios": {
                "A": 0.1,
                "B": 0.7,
                "C": 0.2
            }
        },
        "date": {
            "earliest": "1900-01-01",
            "latest": "today"
        },
    }
    result = generate(population_size, **return_expectations)

    # Check incidence numbers are correct
    null_rows = result[~pd.isnull(result["date"])]
    assert len(null_rows) == (population_size * incidence)

    # Check categories are assigned more-or-less in correct proportion
    category_a = result[result["category"] == "A"]
    category_b = result[result["category"] == "B"]
    category_c = result[result["category"] == "C"]
    assert len(category_b) > len(category_c) > len(category_a)

Example #2

Show file

def test_data_generator_age():
    population_size = 10000
    return_expectations = {
        "rate": "universal",
        "int": {"distribution": "population_ages"},
    }
    result = generate(population_size, **return_expectations)
    assert result.int.min() < 5 and result.int.max() > 95

Example #3

Show file

File: test_expectations.py Project: jamesscottbrown/opensafely-research-template

def test_data_generator_bool():
    population_size = 10000
    incidence = 0.5
    return_expectations = {
        "rate": "exponential_increase",
        "incidence": incidence,
        "bool": True,
    }
    result = generate(population_size, **return_expectations)
    assert result["bool"].fillna(0).mean() == 0.5

Example #4

Show file

def test_data_generator_int():
    population_size = 10000
    incidence = 0.9
    return_expectations = {
        "rate": "exponential_increase",
        "incidence": incidence,
        "int": {"distribution": "normal", "mean": 10, "stddev": 1},
    }
    result = generate(population_size, **return_expectations)
    assert abs(10 - int(result["int"].mean())) < 3

Example #5

Show file

def test_data_generator_universal_category():
    population_size = 10000
    return_expectations = {
        "rate": "universal",
        "category": {"ratios": {"rural": 0.1, "urban": 0.9}},
    }
    result = generate(population_size, **return_expectations)
    assert (
        result.category.value_counts()["urban"]
        > result.category.value_counts()["rural"]
    )

Example #6

Show file

File: test_expectations.py Project: hotelzululima/ics-research

def test_data_generator_float():
    population_size = 10000
    incidence = 0.6
    return_expectations = {
        "rate": "exponential_increase",
        "incidence": incidence,
        "date": {
            "earliest": "1900-01-01",
            "latest": "2020-01-01"
        },
        "float": {
            "distribution": "normal",
            "mean": 35,
            "stddev": 10
        },
    }
    result = generate(population_size, **return_expectations)
    assert abs(35 - int(result["float"].mean())) < 5

Example #7

Show file

def test_data_generator_date():
    population_size = 10000
    incidence = 0.2
    return_expectations = {
        "rate": "exponential_increase",
        "incidence": incidence,
        "date": {"earliest": "1970-01-01", "latest": "2019-12-31"},
    }
    result = generate(population_size, **return_expectations)

    # Check incidence numbers are correct
    null_rows = result[~pd.isnull(result["date"])]
    assert len(null_rows) == (population_size * incidence)

    # Check dates are distributed in increasing frequency
    year_counts = (
        result["date"].dt.strftime("%Y").reset_index().groupby("date").count()["index"]
    )
    max_count = population_size
    for count in list(reversed(year_counts))[:5]:
        assert count < max_count
        max_count = count

Example #8

Show file

File: test_expectations.py Project: hotelzululima/ics-research

def test_data_generator_date_uniform():
    population_size = 100000
    incidence = 0.5
    return_expectations = {
        "rate": "uniform",
        "incidence": incidence,
        "date": {
            "earliest": "2020-01-01",
            "latest": "2020-01-11"
        },
    }
    result = generate(population_size, **return_expectations)

    # Check incidence numbers are correct
    null_rows = result[~pd.isnull(result["date"])]
    assert len(null_rows) == (population_size * incidence)

    # Check dates are distributed approximately evenly
    date_counts = result["date"].reset_index().groupby("date").count()["index"]

    expected = (population_size * incidence) / 10
    for count in date_counts:
        assert isclose(count, expected, rel_tol=0.1)