Exemple #1
0
def create_pymc3_getting_started_model(modelname='pymc3_getting_started_model',
                                       fit=True):
    if fit:
        modelname = modelname + '_fitted'
    np.random.seed(123)
    alpha, sigma = 1, 1
    beta_0 = 1
    beta_1 = 2.5
    size = 100
    X1 = np.random.randn(size)
    X2 = np.random.randn(size) * 0.2
    Y = alpha + beta_0 * X1 + beta_1 * X2 + np.random.randn(size) * sigma
    data = pd.DataFrame({'X1': X1, 'X2': X2, 'Y': Y})

    basic_model = pm.Model()

    with basic_model:
        # Priors for unknown model parameters
        alpha = pm.Normal('alpha', mu=0, sd=10)
        beta_0 = pm.Normal('beta_0', mu=0, sd=10)
        beta_1 = pm.Normal('beta_1', mu=0, sd=10)
        sigma = pm.HalfNormal('sigma', sd=5)

        # Expected value of outcome
        mu = alpha + beta_0 * data['X1'] + beta_1 * data['X2']

        # Likelihood (sampling distribution) of observations
        Y = pm.Normal('Y', mu=mu, sd=sigma, observed=data['Y'])
        X1 = pm.Normal('X1', mu=data['X1'], sd=sigma, observed=data['X1'])
        X2 = pm.Normal('X2', mu=data['X2'], sd=sigma, observed=data['X2'])

        m = ProbabilisticPymc3Model(modelname, basic_model)
        if fit:
            m.fit(data)
        return data, m
Exemple #2
0
def coal_mining_desaster(modelname='pymc3_coal_mining_disaster_model'):
    """TODO: Documentation here. why is there two returned models? what for?"""
    # data
    disasters = np.array([
        4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4,
        4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 3, 2, 1, 1, 1, 1, 3, 0, 0,
        1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2,
        1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, 2, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1
    ])
    years = np.arange(1851, 1962)
    data = pd.DataFrame({'years': years, 'disasters': disasters})
    years = theano.shared(years)

    with pm.Model() as disaster_model:
        switchpoint = pm.DiscreteUniform('switchpoint',
                                         lower=years.min(),
                                         upper=years.max(),
                                         testval=1900)

        # Priors for pre- and post-switch rates number of disasters
        early_rate = pm.Exponential('early_rate', 1.0)
        late_rate = pm.Exponential('late_rate', 1.0)

        # Allocate appropriate Poisson rates to years before and after current
        rate = pm.math.switch(switchpoint >= years, early_rate, late_rate)

        disasters = pm.Poisson('disasters', rate, observed=data['disasters'])
        # years = pm.Normal('years', mu=data['years'], sd=0.1, observed=data['years'])

    model = ProbabilisticPymc3Model(modelname,
                                    disaster_model,
                                    shared_vars={'years': years})
    model_fitted = model.copy(name=modelname + '_fitted').fit(data)
    return model, model_fitted
Exemple #3
0
def create_allbus_model_5(filename='test_allbus.csv',
                          modelname='allbus_model_5',
                          fit=True):
    if fit:
        modelname = modelname + '_fitted'
    # Load and prepare data
    data = pd.read_csv(filename, index_col=0)
    data = data.drop(
        ['eastwest', 'lived_abroad', 'spectrum', 'sex', 'educ', 'health'],
        axis=1)
    # Reduce size of data to improve performance
    data = data.sample(n=500, random_state=1)
    data.sort_index(inplace=True)
    # Set up shared variables
    age = theano.shared(np.array(data['age']))
    allbus_model = pm.Model()
    with allbus_model:
        # priors
        alpha_sd = pm.Uniform('alpha_sd', 0, 2000)
        beta_sd = pm.Uniform('beta_sd', 0, 1000)
        loc_transform = pm.Normal('loc_transform', 50, 20)
        scale_transform = pm.Uniform('scale_transform', 0, 100)
        sd_happ = pm.Uniform('sd_happ', 0, 5)
        k_logistic = pm.Uniform('k_logistic', 0, 10)
        x_0_logistic = pm.Uniform('x_0_logistic', 0, 1000)

        # likelihood
        # transform age so that it resembles a bell-shaped distribution
        def normal_pdf(x, loc, scale):
            return 1 / np.sqrt(2 * math.pi * scale**2) * np.exp(-(x - loc)**2 /
                                                                (2 * scale**2))

        age_transformed = normal_pdf(age,
                                     loc=loc_transform,
                                     scale=scale_transform)
        sd_income = alpha_sd + beta_sd * age_transformed
        age + loc_transform
        income = pm.HalfNormal('income', sd_income, observed=data['income'])

        def logistic_func(l, k, x, x_0):
            # l: curve's max value
            # x_0: x_value of sigmoid's mid-point
            # k: steepness of the curve
            return l / (1 + np.exp(-k * (x - x_0)))

        mu_happ = logistic_func(10, k_logistic, income, x_0_logistic)

        happiness = pm.Normal('happiness',
                              mu_happ,
                              sd_happ,
                              observed=data['happiness'])

    # Create model instance for Lumen
    m = ProbabilisticPymc3Model(modelname,
                                allbus_model,
                                shared_vars={'age': age})
    if fit:
        m.fit(data)
    return data, m
Exemple #4
0
def create_allbus_model_4(filename='test_allbus.csv',
                          modelname='allbus_model_4',
                          fit=True):
    if fit:
        modelname = modelname + '_fitted'
    # Load and prepare data
    data = pd.read_csv(filename, index_col=0)
    data = data.drop(
        ['eastwest', 'lived_abroad', 'spectrum', 'sex', 'educ', 'health'],
        axis=1)
    # Reduce size of data to improve performance
    data = data.sample(n=500, random_state=1)
    data.sort_index(inplace=True)
    # Set up shared variables
    age = theano.shared(np.array(data['age']))
    allbus_model = pm.Model()
    with allbus_model:
        # priors
        alpha_sd = pm.Uniform('alpha_sd', 0, 2000)
        beta_sd = pm.Uniform('beta_sd', 0, 1000)
        loc_transform = pm.Normal('loc_transform', 50, 20)
        scale_transform = pm.Uniform('scale_transform', 0, 100)
        sd_happ = pm.Uniform('sd_happ', 0, 5)
        alpha_happ1 = pm.Uniform('alpha_happ1', -10, 5)
        alpha_happ2 = pm.Uniform('alpha_happ2', 5, 10)
        beta_happ1 = pm.Uniform('beta_happ1', -0.001, 0.001)
        beta_happ2 = pm.Uniform('beta_happ2', -0.001, 0.001)

        # likelihood
        # transform age so that it resembles a bell-shaped distribution
        def normal_pdf(x, loc, scale):
            return 1 / np.sqrt(2 * math.pi * scale * scale) * np.exp(
                -(x - loc)**2 / (2 * scale * scale))

        age_transformed = normal_pdf(age,
                                     loc=loc_transform,
                                     scale=scale_transform)
        sd_income = alpha_sd + beta_sd * age_transformed
        income = pm.HalfNormal('income', sd_income, observed=data['income'])
        switchpoint = pm.Uniform('switchpoint', 500, 2000)
        beta_happ = pm.math.switch(income < switchpoint, beta_happ1,
                                   beta_happ2)
        alpha_happ = pm.math.switch(income < switchpoint, alpha_happ1,
                                    alpha_happ2)
        mu_happ = alpha_happ + beta_happ * income
        # Cap happiness at 10
        mu_happ = pm.math.switch(mu_happ < 10, mu_happ, 10)
        happiness = pm.Normal('happiness',
                              mu_happ,
                              sd_happ,
                              observed=data['happiness'])

    # Create model instance for Lumen
    m = ProbabilisticPymc3Model(modelname,
                                allbus_model,
                                shared_vars={'age': age})
    if fit:
        m.fit(data)
    return data, m
Exemple #5
0
def create_example_model(modelname='my_name', fit=True):
    if fit:
        modelname = modelname + '_fitted'
    ## Load data as pandas df
    #data = pd.read_csv(...)
    example_model = pm.Model()
    ## Specify  your model
    # with example_model:
    # ...
    m = ProbabilisticPymc3Model(modelname, example_model)
    if fit:
        m.fit(data)
    return data, m
Exemple #6
0
def create_flight_delay_model_2(filename='airlineDelayDataProcessed.csv',
                                modelname='flight_delay_2',
                                fit=True):
    if fit:
        modelname = modelname + '_fitted'

    data = pd.read_csv(filename)
    data = data.rename(columns={
        'DEP_TIME': 'dep_time',
        'DEP_DELAY': 'depdelay'
    })

    # Drop variables that are not considered in the model
    data = data.drop([
        'UNIQUE_CARRIER', 'DAY_OF_MONTH', 'DAY_OF_WEEK', 'ORIGIN_AIRPORT_ID',
        'DEST_AIRPORT_ID', 'ACTUAL_ELAPSED_TIME', 'ARR_DELAY', 'DISTANCE'
    ],
                     axis=1)

    # Reduce size of data to improve performance
    data = data.sample(n=1000, random_state=1)
    data.sort_index(inplace=True)

    # Create shared variables
    deptime = theano.shared(np.array(data['dep_time']))

    # Create model
    delay_model = pm.Model()

    with delay_model:
        beta_dep = pm.Uniform('beta_dep', 0, 1, shape=2)
        beta_var = pm.Uniform('beta_var', 0, 1, shape=2)
        # Improvement 1: Assume that variance is a linear function of time, instead of uniformly distributed
        var = beta_var[0] + beta_var[1] * deptime
        # I assume that depdelay is a function of deptime
        mu_depdelay = beta_dep[0] + beta_dep[1] * deptime
        depdelay = pm.Normal('depdelay',
                             mu_depdelay,
                             var,
                             observed=data['depdelay'])

    m = ProbabilisticPymc3Model(modelname,
                                delay_model,
                                shared_vars={'dep_time': deptime})
    if fit:
        m.fit(data)
    return data, m
Exemple #7
0
def create_allbus_model_1(filename='test_allbus.csv',
                          modelname='allbus_model_1',
                          fit=True):
    if fit:
        modelname = modelname + '_fitted'
    # Load and prepare data
    data = pd.read_csv(filename, index_col=0)
    data = data.drop(
        ['eastwest', 'lived_abroad', 'spectrum', 'sex', 'educ', 'health'],
        axis=1)
    # Reduce size of data to improve performance
    data = data.sample(n=500, random_state=1)
    data.sort_index(inplace=True)
    # Set up shared variables
    age = theano.shared(np.array(data['age']))
    allbus_model = pm.Model()
    with allbus_model:
        # priors
        sd_income = pm.Uniform('sd_income', 0, 1000)
        alpha_inc = pm.Uniform('alpha_inc', -1000, 1000)
        beta_inc = pm.Uniform('beta_inc', -100, 100)
        sd_happ = pm.Uniform('sd_happ', 0, 5)
        alpha_happ = pm.Uniform('alpha_happ', -10, 10)
        beta_happ = pm.Uniform('beta_happ', -0.001, 0.001)
        # likelihood
        mu_income = alpha_inc + beta_inc * age
        income = pm.Normal('income',
                           mu_income,
                           sd_income,
                           observed=data['income'])
        mu_happ = alpha_happ + beta_happ * income
        happiness = pm.Normal('happiness',
                              mu_happ,
                              sd_happ,
                              observed=data['happiness'])

    # Create model instance for Lumen
    m = ProbabilisticPymc3Model(modelname,
                                allbus_model,
                                shared_vars={'age': age})
    if fit:
        m.fit(data)
    return data, m
Exemple #8
0
def create_pymc3_simplest_model(modelname='pymc3_simplest_model', fit=True):
    if fit:
        modelname = modelname + '_fitted'
    np.random.seed(2)
    size = 100
    mu = np.random.normal(0, 1, size=size)
    sigma = 1
    X = np.random.normal(mu, sigma, size=size)
    data = pd.DataFrame({'X': X})

    basic_model = pm.Model()
    with basic_model:
        sigma = 1
        mu = pm.Normal('mu', mu=0, sd=sigma)
        X = pm.Normal('X', mu=mu, sd=sigma, observed=data['X'])
    m = ProbabilisticPymc3Model(modelname, basic_model)
    if fit:
        m.fit(data)
    return data, m
Exemple #9
0
def create_pymc3_eight_schools_model(modelname='pymc3_eight_schools_model',
                                     fit=True):
    if fit:
        modelname = modelname + '_fitted'

    scores = np.array([28.39, 7.94, -2.75, 6.82, -0.64, 0.63, 18.01, 12.16])
    standard_errors = np.array([14.9, 10.2, 16.3, 11.0, 9.4, 11.4, 10.4, 17.6])
    data = pd.DataFrame({
        'test_scores': scores,
        'standard_errors': standard_errors
    })
    standard_errors = theano.shared(standard_errors)

    with pm.Model() as normal_normal_model:
        tau = pm.Uniform('tau', lower=0, upper=10)
        mu = pm.Uniform('mu', lower=0, upper=10)
        theta_1 = pm.Normal('theta_1', mu=mu, sd=tau)
        theta_2 = pm.Normal('theta_2', mu=mu, sd=tau)
        theta_3 = pm.Normal('theta_3', mu=mu, sd=tau)
        theta_4 = pm.Normal('theta_4', mu=mu, sd=tau)
        theta_5 = pm.Normal('theta_5', mu=mu, sd=tau)
        theta_6 = pm.Normal('theta_6', mu=mu, sd=tau)
        theta_7 = pm.Normal('theta_7', mu=mu, sd=tau)
        theta_8 = pm.Normal('theta_8', mu=mu, sd=tau)

        test_scores = pm.Normal('test_scores',
                                mu=[
                                    theta_1, theta_2, theta_3, theta_4,
                                    theta_5, theta_6, theta_7, theta_8
                                ],
                                sd=standard_errors,
                                observed=data['test_scores'])

    m = ProbabilisticPymc3Model(
        modelname,
        normal_normal_model,
        shared_vars={'standard_errors': standard_errors},
        fixed_data_length=True)
    if fit:
        m.fit(data)
    return data, m