Exemple #1
0
def get_city_data():
    """Helper to get city data"""
    data = pd.read_csv(pm.get_data_file('pymc3.examples', 'data/srrs2.dat'))
    cty_data = pd.read_csv(pm.get_data_file('pymc3.examples', 'data/cty.dat'))

    data = data[data.state == 'MN']

    data['fips'] = data.stfips * 1000 + data.cntyfips
    cty_data['fips'] = cty_data.stfips * 1000 + cty_data.ctfips
    data['lradon'] = np.log(np.where(data.activity == 0, .1, data.activity))
    data = data.merge(cty_data, 'inner', on='fips')

    unique = data[['fips']].drop_duplicates()
    unique['group'] = np.arange(len(unique))
    unique.set_index('fips')
    return data.merge(unique, 'inner', on='fips')
Exemple #2
0
def get_city_data():
    """Helper to get city data"""
    data = pd.read_csv(pm.get_data_file('pymc3.examples', 'data/srrs2.dat'))
    cty_data = pd.read_csv(pm.get_data_file('pymc3.examples', 'data/cty.dat'))

    data = data[data.state == 'MN']

    data['fips'] = data.stfips * 1000 + data.cntyfips
    cty_data['fips'] = cty_data.stfips * 1000 + cty_data.ctfips
    data['lradon'] = np.log(np.where(data.activity == 0, .1, data.activity))
    data = data.merge(cty_data, 'inner', on='fips')

    unique = data[['fips']].drop_duplicates()
    unique['group'] = np.arange(len(unique))
    unique.set_index('fips')
    return data.merge(unique, 'inner', on='fips')
Exemple #3
0
    def build_model(self):
        wells = pm.get_data_file('pymc3.examples', 'data/wells.dat')
        data = pd.read_csv(wells, delimiter=u' ', index_col=u'id', dtype={u'switch': np.int8})
        data.dist /= 100
        data.educ /= 4
        col = data.columns
        P = data[col[1:]]
        P -= P.mean()
        P['1'] = 1

        with pm.Model() as model:
            effects = pm.Normal('effects', mu=0, tau=100. ** -2, shape=len(P.columns))
            p = pm.sigmoid(pm.dot(np.array(P), effects))
            pm.Bernoulli('s', p, observed=np.array(data.switch))
        return model
Exemple #4
0
    def build_model(self):
        wells = pm.get_data_file('pymc3.examples', 'data/wells.dat')
        data = pd.read_csv(wells,
                           delimiter=u' ',
                           index_col=u'id',
                           dtype={u'switch': np.int8})
        data.dist /= 100
        data.educ /= 4
        col = data.columns
        P = data[col[1:]]
        P -= P.mean()
        P['1'] = 1

        with pm.Model() as model:
            effects = pm.Normal('effects',
                                mu=0,
                                tau=100.**-2,
                                shape=len(P.columns))
            p = tt.nnet.sigmoid(tt.dot(np.array(P), effects))
            pm.Bernoulli('s', p, observed=np.array(data.switch))
        return model
Exemple #5
0
# <codecell>
import pandas as pd
from pylab import *

from pymc3 import StudentT, Model, NUTS, Normal, find_MAP, trace, get_data_file
from pymc3.distributions.timeseries import GaussianRandomWalk

# <markdowncell>

# Data
# ----

# <codecell>

data = pd.read_csv(get_data_file("pymc3.examples", "data/pancreatitis.csv"))
countries = ["CYP", "DNK", "ESP", "FIN", "GBR", "ISL"]
data = data[data.area.isin(countries)]

age = data["age"] = np.array(data.age_start + data.age_end) / 2
rate = data.value = data.value * 1000
group, countries = pd.factorize(data.area, order=countries)


ncountries = len(countries)

# <codecell>

for i, country in enumerate(countries):
    subplot(2, 3, i + 1)
    title(country)
Exemple #6
0
import pymc3 as pm
import pandas as pd
from numpy.ma import masked_values

# Import data, filling missing values with sentinels (-999)
test_scores = pd.read_csv(pm.get_data_file(
    'pymc3.examples', 'data/test_scores.csv')).fillna(-999)

# Extract variables: test score, gender, number of siblings, previous disability, age,
# mother with HS education or better, hearing loss identified by 3 months
# of age
(score, male, siblings, disability,
    age, mother_hs, early_ident) = test_scores[['score', 'male', 'siblings',
                                                'prev_disab', 'age_test',
                                                'mother_hs', 'early_ident']].astype(float).values.T

with pm.Model() as model:

    # Impute missing values
    sib_mean = pm.Exponential('sib_mean', 1)
    siblings_imp = pm.Poisson('siblings_imp', sib_mean,
                              observed=masked_values(siblings, value=-999))

    p_disab = pm.Beta('p_disab', 1, 1)
    disability_imp = pm.Bernoulli(
        'disability_imp', p_disab, observed=masked_values(disability, value=-999))

    p_mother = pm.Beta('p_mother', 1, 1)
    mother_imp = pm.Bernoulli('mother_imp', p_mother,
                              observed=masked_values(mother_hs, value=-999))
Exemple #7
0
# <codecell>
import pandas as pd
from pylab import *

from pymc3 import StudentT, Model, NUTS, Normal, find_MAP, trace, get_data_file
from pymc3.distributions.timeseries import GaussianRandomWalk

# <markdowncell>

# Data
# ----

# <codecell>

data = pd.read_csv(get_data_file('pymc3.examples', 'data/pancreatitis.csv'))
countries = ['CYP', 'DNK', 'ESP', 'FIN', 'GBR', 'ISL']
data = data[data.area.isin(countries)]

age = data['age'] = np.array(data.age_start + data.age_end) / 2
rate = data.value = data.value * 1000
group, countries = pd.factorize(data.area, order=countries)


ncountries = len(countries)

# <codecell>

for i, country in enumerate(countries):
    subplot(2, 3, i + 1)
    title(country)
Exemple #8
0
#https://github.com/pymc-devs/pymc3/blob/master/pymc3/examples/stochastic_volatility.py

import numpy as np
import matplotlib.pyplot as plt
import pymc3
import pymc3.distributions.timeseries as ts
import pandas as pd
import scipy

#fname = 'https://github.com/pymc-devs/pymc3/blob/master/pymc3/examples/data/SP500.csv'
#returns = pd.read_csv(fname)
#returns = pd.read_csv('SP500.csv')
#print(len(returns))

n = 400
returns = np.genfromtxt(pymc3.get_data_file('pymc3.examples',
                                            "data/SP500.csv"))[-n:]
returns[:5]

plt.plot(returns)
plt.ylabel('daily returns in %')

with pymc3.Model() as sp500_model:
    nu = pymc3.Exponential('nu', 1. / 10, testval=5.)
    sigma = pymc3.Exponential('sigma', 1. / .02, testval=.1)
    s = ts.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = pymc3.Deterministic('volatility_process',
                                             pymc3.exp(-2 * s))
    r = pymc3.StudentT('r', nu, lam=1 / volatility_process, observed=returns)

with sp500_model:
    print('optimizing...')
Exemple #9
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pymc3 import HalfCauchy, Model, Normal, get_data_file, sample
from pymc3.distributions.timeseries import GaussianRandomWalk

data = pd.read_csv(get_data_file('pymc3.examples', 'data/pancreatitis.csv'))
countries = ['CYP', 'DNK', 'ESP', 'FIN', 'GBR', 'ISL']
data = data[data.area.isin(countries)]

age = data['age'] = np.array(data.age_start + data.age_end) / 2
rate = data.value = data.value * 1000
group, countries = pd.factorize(data.area, order=countries)


ncountries = len(countries)

for i, country in enumerate(countries):
    plt.subplot(2, 3, i + 1)
    plt.title(country)
    d = data[data.area == country]
    plt.plot(d.age, d.value, '.')

    plt.ylim(0, rate.max())


nknots = 10
knots = np.linspace(data.age_start.min(), data.age_end.max(), nknots)

Exemple #10
0
#https://github.com/pymc-devs/pymc3/blob/master/pymc3/examples/stochastic_volatility.py

import numpy as np
import matplotlib.pyplot as plt
import pymc3
import pymc3.distributions.timeseries as ts
import pandas as pd
import scipy

#fname = 'https://github.com/pymc-devs/pymc3/blob/master/pymc3/examples/data/SP500.csv'
#returns = pd.read_csv(fname)
#returns = pd.read_csv('SP500.csv')
#print(len(returns))

n = 400
returns = np.genfromtxt(pymc3.get_data_file('pymc3.examples', "data/SP500.csv"))[-n:]
returns[:5]

plt.plot(returns)
plt.ylabel('daily returns in %');


with pymc3.Model() as sp500_model:
    nu = pymc3.Exponential('nu', 1./10, testval=5.)
    sigma = pymc3.Exponential('sigma', 1./.02, testval=.1)
    s = ts.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process =  pymc3.Deterministic('volatility_process', pymc3.exp(-2*s))
    r = pymc3.StudentT('r', nu, lam=1/volatility_process, observed=returns)
    
with sp500_model:
    print 'optimizing...'