Ejemplo n.º 1
0
def calibrate_noise_model(benchmarks, all_node_results, run_name=None,
    model_filename="models/noise-with-outliers.stan", iterations=9000, warmup=8000, chains=1):
    """
    Run the given noise model for the benchmark stars.
    """

    if run_name is None:
        run_name = "unnamed"

    else:
        # If a name has been given, use a timestamp too.
        run_name = "-".join([run_name, format(md5(ctime().encode("utf-8")).hexdigest())])

    # Check for a compiled version of this model.
    basename, ext = os.path.splitext(model_filename)
    if os.path.exists(basename + ".pkl"):
        # There's a compiled version. Use that.
        model_filename = basename + ".pkl"
        logging.info("Using pre-compiled model {0}".format(model_filename))
        with open(model_filename, "rb") as fp:
            model = pickle.load(fp)

    else:
        # Compilation required.
        model = StanModel(model_filename)
        pickled_model_filename = basename + ".pkl"
        logging.info("Pickling compiled model to {0}".format(pickled_model_filename))
        with open(pickled_model_filename, "wb") as fp:
            pickle.dump(model, fp)

    data, node_names = build_data_dict(benchmarks, all_node_results)

    logging.info("Optimizing...")
    op = model.optimizing(data=data)
    logging.info("Optimized Values: \n{0}".format(op["par"]))

    logging.info("Fitting...")
    calibrated_model = model.sampling(data=data, pars=op["par"], iter=iterations, warmup=warmup, chains=chains)    
    
    # Add the node names into the data dict.
    calibrated_model.data["node_names"] = node_names

    return calibrated_model
Ejemplo n.º 2
0
def test_matrix_param_order_optimizing():
    model_code = """
    data {
    int<lower=2> K;
    }
    parameters {
    matrix[K,2] beta;
    }
    model {
    for (k in 1:K)
      beta[k,1] ~ normal(0,1);
    for (k in 1:K)
      beta[k,2] ~ normal(100,1);
    }"""
    sm = StanModel(model_code=model_code)
    op = sm.optimizing(data=dict(K=3))
    beta = op['par']['beta']
    assert beta.shape == (3, 2)
    beta_colmeans = np.mean(beta, axis=0)
    assert beta_colmeans[0] < 4
    assert beta_colmeans[1] > 100 - 4
Ejemplo n.º 3
0
    stan_code = f.read()

stan_data_dict = {
    'N': data_wc.target.size,
    'p': 1,  #data_wc.data.shape[1],
    'X': data_wc.data[:, [0]],
    'y': data_wc.target
}

tmpHeader = "my-multiply.txt"
with open("tmp.hpp", "w") as f:
    with open(tmpHeader, "r") as f_cur:
        tmpHeaderTxt = f_cur.read()
    f.write(tmpHeaderTxt)

if False:
    bayes_logreg2_native = StanModel(model_code=stan_code)
    # stan_fit_native    = bayes_logreg2_native.sampling(data=stan_data_dict, iter=2000, chains=2);
    stan_optim2 = bayes_logreg2_native.optimizing(data=stan_data_dict,
                                                  iter=50000)

#print("optimum value of original (sq) NLL is {}".format(stan_optim2))
print("=================================")
print("Now onto the customised version...")
with open("stan_logreg_2my.txt") as f:
    stan_code_my = f.read()
bayes_logreg2_my = StanModel(model_code=stan_code_my,
                             allow_undefined=True,
                             includes=['tmp.hpp'],
                             include_dirs=["."])
stan_optim2_my = bayes_logreg2_my.optimizing(data=stan_data_dict, iter=50000)
Ejemplo n.º 4
0
# CAR with count features
# =============================================================================

#Compile stan model
car_model = StanModel(file = w_dir + '/code/kdd_modelling/Stan/poisson_sparse_CAR.stan')
#Define max_D for CAR models
max_D = 1
#Get data + fit model
X_train, X_test, _, y_train, y_test, _, _, _,_,_ = data_pipeline(feature_engineering = True, 
                                                                 feature_type = 'count')

stan_data = dict(y_train = y_train.astype(np.int64).squeeze(), y_test = y_test.astype(np.int64).squeeze(), p = X_train.shape[1] - 2, N_train = X_train.shape[0], 
                 N_test = X_test.shape[0], X_train = X_train[:,2:], X_test = X_test[:,2:], loc_train = X_train[:,0:2], 
				 loc_test = X_test[:,0:2], max_D = max_D)

model_fit = car_model.optimizing(data = stan_data,iter = 20000,init="0")
pred_oos = model_fit['pred_test']
pred_is = model_fit['pred_train']
rmse_oos = np.sqrt(np.mean((pred_oos-y_test.squeeze())**2))
lik_oos = np.sum(model_fit['log_lik_test'])
rmse_is = np.sqrt(np.mean((pred_is-y_train.squeeze())**2))
lik_is = np.sum(model_fit['log_lik'])
row = pd.Series({'RMSE_IS':rmse_is,'LIK_IS':lik_is,'RMSE_OOS':rmse_oos,'LIK_OOS':lik_oos}, name='CAR model (count)')
results_tbl = results_tbl.append(row)

# =============================================================================
# CAR with dist features
# =============================================================================
X_train, X_test, _, y_train, y_test, _, _, _,_,_ = data_pipeline(feature_engineering = True, 
                                                                 feature_type = 'dist')
Ejemplo n.º 5
0
initBeta = [{'beta': np.arange(-3, 3, 0.2) * 0.05}]

tmpHeader = "my-dotprod.hpp"
with open("tmp.hpp", "w") as f:
    with open(tmpHeader, "r") as f_cur:
        tmpHeaderTxt = f_cur.read()
    f.write(tmpHeaderTxt)

initBeta = [{'beta': np.arange(-3, 3, 0.2) * 0.05}]

if False:
    bayes_logreg3_native = StanModel(model_code=stan_code)
    # stan_fit_native    = bayes_logreg2_native.sampling(data=stan_data_dict, iter=2000, chains=2);
    stan_optim3 = bayes_logreg3_native.optimizing(data=stan_data_dict,
                                                  iter=50000,
                                                  init=initBeta)

if True:
    #print("optimum value of original (sq) NLL is {}".format(stan_optim2))
    print("=================================")
    print("Now onto the customised version...")
    with open("stan_logreg_3my.txt") as f:
        stan_code_my = f.read()
    if False:
        bayes_logreg3_my = StanModel(model_code=stan_code_my,
                                     allow_undefined=True,
                                     includes=['tmp.hpp'],
                                     include_dirs=["."])

    bayes_logreg3_my = StanModel(model_code=stan_code_my)
Ejemplo n.º 6
0
Archivo: toy.py Proyecto: andycasey/ges
		increment_log_prob(log_sum_exp(
			log1m_alpha + normal_log(sp_vector[1], outlier_teff_mu, outlier_teff_sigma),
			log1m_alpha + normal_log(sp_vector[2], outlier_teff_mu, outlier_teff_sigma)
		));
    }
}"""

# Ok, here is our toy data:
with open("toy.data", "r") as fp:
	data = json.load(fp)

model = StanModel(model_code=model_code)

print("Optimizing...")
op = model.optimizing(data=data)

print("Fitting...")
fit = model.sampling(data=data, pars=op["par"], iter=20000)

subplots_adjust = { "left": 0.10, "bottom": 0.05, "right": 0.95, "top": 0.95,
	"wspace": 0.20, "hspace": 0.45
	}

nodes = range(2)
dimensions = ("teff", "logg")

# Plot the m, b parameters for each node
dimensions_traced = []
for node in nodes:
	node_dimensions = \
Ejemplo n.º 7
0
class QM:
    """
    Base class for quantile matching (QM) models

    Parameters
    __________
    parameters_dict : Dict
        Keys are internal names for the priors of the model
        e.g. 'mu', 'sigma' for a GaussianQM. Values are the user 
        defined Distributions. Note key must not be identical to value.name.
    """
    def __init__(self, parameters_dict: Dict[str, Distribution]):
        self.parameters_dict = self._check_dict(parameters_dict)
        self.model = None

    def __str__(self):
        return self.__class__.__name__ + '(' +  \
            ', '.join([p.__str__() for p in self.parameters_dict.values()]) + \
            ')'

    def __repr__(self):
        return self.__str__()

    def _check_dict(self, parameters_dict:Dict[str, Distribution]):
        for key, value in parameters_dict.items():
            if not isinstance(value, Distribution):
                raise ValueError(f'Input parameter "{key}" of "{self.__class__.__name__}" needs to be a Distribution (see bqme.distributions), but is of type {type(value)}.')
        return parameters_dict

    def _template_replacements(self) -> Dict['str', 'str']:
        """
        returns a dict that contains keys as template variables
        and values are the strings for the variables.
        Necessary keys: parametersnames, parameters, priors, cdf, lpdf, rng
        """
        distribution_name = self.__class__.__name__.replace("QM", "").lower()
        build = lambda s: '\n    '.join([
                p.code()[s] for p in self.parameters_dict.values()
            ])
        replacements = {
                'parametersnames'   : ', '.join([
                        p.name for p in self.parameters_dict.values()
                    ]),
                'parameters'        : build('parameter'),
                'priors'            : build('prior'),
                'cdf'               : f'{distribution_name}_cdf',
                'lpdf'              : f'{distribution_name}_lpdf',
                'rng'               : f'{distribution_name}_rng',
            }
        return replacements

    def _stan_code(self) -> str:
        with open(STAN_TEMPLATE_PATH) as f:
            code = f.read()
        for k, v in self._template_replacements().items():
            code = code.replace(f'${k}$', v)
        return code

    def _check_domain(self, X):
        minn, maxx = self.domain()
        f = lambda x: not(minn < x < maxx)
        if len(list(filter(f, X))) > 0:
            raise ValueError(f'some elements of X are not in the domain of the model, which is ({minn}, {maxx}).')

    def domain(self): pass

    @property
    def code(self) -> str:
        """ returns the final stan code """
        return self._stan_code()

    def compile(self):
        self.model = StanModel(model_code=self.code)

    def sampling(self, N:int, q:Tuple[float,...], X:Tuple[float,...]) -> 'StanFit4Model':
        self._check_domain(X)
        if self.model is None: self.compile()
        data_dict = {'N':N, 'M':len(q), 'q':q, 'X':X}
        samples = self.model.sampling(data=data_dict)
        return FitObjectSampling(self, samples)

    def optimizing(self, N:int, q:Tuple[float,...], X:Tuple[float,...]) -> 'StanFit4Model':
        self._check_domain(X)
        if self.model is None: self.compile()
        data_dict = {'N':N, 'M':len(q), 'q':q, 'X':X}
        opt = self.model.optimizing(data=data_dict)
        return FitObjectOptimizing(self, opt)
Ejemplo n.º 8
0
def test_optimizing_basic():
    sm = StanModel(model_code='parameters {real y;} model {y ~ normal(0,1);}')
    op = sm.optimizing()
    assert op['par']['y'].shape == ()
    assert abs(op['par']['y']) < 1
#Get data + fit model
X_train, X_test, _, y_train, y_test, _, _, _, _, _ = data_pipeline(
    feature_engineering=True, feature_type='count')

stan_data = dict(y_train=y_train.squeeze(),
                 y_test=y_test.squeeze(),
                 p=X_train.shape[1] - 2,
                 N_train=X_train.shape[0],
                 N_test=X_test.shape[0],
                 X_train=X_train[:, 2:],
                 X_test=X_test[:, 2:],
                 loc_train=X_train[:, 0:2],
                 loc_test=X_test[:, 0:2],
                 max_D=max_D)

model_fit = car_model.optimizing(data=stan_data, iter=20000, seed=1)
pred_oos = model_fit['pred_test']
pred_is = model_fit['pred_train']
rmse_oos = np.sqrt(np.mean((pred_oos - y_test.squeeze())**2))
lik_oos = np.sum(model_fit['log_lik_test'])
rmse_is = np.sqrt(np.mean((pred_is - y_train.squeeze())**2))
lik_is = np.sum(model_fit['log_lik'])
row = pd.Series(
    {
        'RMSE_IS': rmse_is,
        'LIK_IS': lik_is,
        'RMSE_OOS': rmse_oos,
        'LIK_OOS': lik_oos
    },
    name='CAR model (count)')
results = results.append(row)