def test_HDDMTruncated_distributions(self): params = hddm.generate.gen_rand_params() data, params_subj = hddm.generate.gen_rand_data(subjs=4, params=params) m = hddm.HDDMTruncated(data) m.sample(self.iter, burn=self.burn) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['v'], pm.Normal) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['mu'], pm.Normal) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['tau'], pm.Deterministic) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['v'].parents['tau'].parents['x'], pm.Uniform) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['a'], pm.TruncatedNormal) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['a'].parents['mu'], pm.Uniform) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['a'].parents['tau'], pm.Deterministic) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['a'].parents['tau'].parents['x'], pm.Uniform) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['t'], pm.TruncatedNormal) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['t'].parents['tau'], pm.Deterministic) assert isinstance(m.nodes_db.ix['wfpt.0']['node'].parents['t'].parents['tau'].parents['x'], pm.Uniform)
def test_HDDMTruncated_distributions(self): params = hddm.generate.gen_rand_params() data, params_subj = hddm.generate.gen_rand_data(subjs=4, params=params, size=10) m = hddm.HDDMTruncated(data) m.sample(self.iter, burn=self.burn) self.assertIsInstance(m.nodes_db.loc["wfpt.0"]["node"].parents["v"], pm.Normal) self.assertIsInstance( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].parents["mu"], pm.Normal) self.assertIsInstance( m.nodes_db.loc["wfpt.0"]["node"].parents["v"].parents["tau"], pm.Deterministic, ) self.assertIsInstance( m.nodes_db.loc["wfpt.0"] ["node"].parents["v"].parents["tau"].parents["x"], pm.Uniform, ) self.assertIsInstance(m.nodes_db.loc["wfpt.0"]["node"].parents["a"], pm.TruncatedNormal) self.assertIsInstance( m.nodes_db.loc["wfpt.0"]["node"].parents["a"].parents["mu"], pm.Uniform) self.assertIsInstance( m.nodes_db.loc["wfpt.0"]["node"].parents["a"].parents["tau"], pm.Deterministic, ) self.assertIsInstance( m.nodes_db.loc["wfpt.0"] ["node"].parents["a"].parents["tau"].parents["x"], pm.Uniform, ) self.assertIsInstance(m.nodes_db.loc["wfpt.0"]["node"].parents["t"], pm.TruncatedNormal) self.assertIsInstance( m.nodes_db.loc["wfpt.0"]["node"].parents["t"].parents["tau"], pm.Deterministic, ) self.assertIsInstance( m.nodes_db.loc["wfpt.0"] ["node"].parents["t"].parents["tau"].parents["x"], pm.Uniform, )
def check_outlier_model(seed=None, p_outlier=0.05): """Estimate data which contains outliers""" if seed is not None: np.random.seed(seed) #generate params and data params_true = hddm.generate.gen_rand_params(include=()) data, temp = hddm.generate.gen_rand_data(size=500, params=params_true) data = pd.DataFrame(data) #generating outliers n_outliers = int(len(data) * p_outlier) outliers = data[:n_outliers].copy() #fast outliers outliers.rt[:n_outliers // 2] = np.random.rand( n_outliers // 2) * (min(abs(data['rt'])) - 0.11) + 0.11 #slow outliers outliers.rt[n_outliers // 2:] = np.random.rand(n_outliers - n_outliers // 2) * 2 + max( abs(data['rt'])) outliers.response = np.random.randint(0, 2, n_outliers) print("generating %d outliers. %f of the dataset" % (n_outliers, float(n_outliers) / (n_outliers + len(data)))) print("%d outliers are fast" % sum(outliers.rt < min(data.rt))) print("%d outliers are slow" % sum(outliers.rt > max(data.rt))) #Estimating the data without outliers. this is the best estimation we could get #from this data hm = hddm.HDDMTruncated(data) hm.map() index = ['true', 'estimated'] best_estimate = hm.values df = pd.DataFrame([params_true, hm.values], index=index, dtype=np.float).dropna(1) print("benchmark: MAP of clean data. This is as good as we can get") print(df) #combine data with outliers data = pd.concat((data, outliers), ignore_index=True) #estimate the data with outlier, to confirm that it is worse hm = hddm.HDDMTruncated(data) hm.map() index = ['best_estimate', 'this_estimate'] df = pd.DataFrame([best_estimate, hm.values], index=index, dtype=np.float).dropna(1) print("MAP with outliers: This is as bas as we can get") print(df) #MAP with p_outlier as random variable hm = hddm.HDDMTruncated(data, include='p_outlier') hm.map() df = pd.DataFrame([best_estimate, hm.values], index=index, dtype=np.float) df.ix['best_estimate']['p_outlier'] = 0 print("MAP with random p_outlier (Estimated from the data)") print(df.dropna(1)) #MAP with fixed p_outlier fixed_p_outlier = 0.1 hm = hddm.HDDMTruncated(data, p_outlier=fixed_p_outlier) hm.map() df = pd.DataFrame([best_estimate, hm.values], index=index, dtype=np.float) print("MAP with fixed p_outlier (%.3f) " % fixed_p_outlier) print(df.dropna(1)) #Chi-square hm = hddm.HDDMTruncated(data) hm.optimize(method='chisquare') df = pd.DataFrame([best_estimate, hm.values], index=index, dtype=np.float).dropna(1) print("Chisquare method") print(df) return data
def init_model(self, data): self.model = hddm.HDDMTruncated(data, group_only_nodes=['sz', 'st', 'sv'], **self.init_kwargs)
def init_model(self, data): self.model = hddm.HDDMTruncated(data, **self.init_kwargs)