def test_evidence_hdf5(self): trails = [] with open("../data/test_case_4") as f: for line in f: if line.strip() == "": continue line = line.strip().split(" ") trails.append(np.array(line)) states = set() for row in trails: col = list(row) for c in col: states.add(c) #build the vocabulary for matrix A vocab = dict(((t, i) for i, t in enumerate(states))) A = lil_matrix((5,5)) A[vocab["1"],vocab["2"]] = 0.6 A[vocab["1"],vocab["1"]] = 0.4 A[vocab["3"],vocab["5"]] = 0.4 A = A.tocsr() hdf5_save(A,"test.hdf5") ret1 = distr_chips(A, 25) distr_chips_hdf5_sparse("test.hdf5", 25, A.sum(), "out.hdf5") h5 = tb.open_file("out.hdf5", 'r') ret2 = h5.root markov = MarkovChain(use_prior=True, reset = True, prior=1., specific_prior=ret1, specific_prior_vocab = vocab, modus="bayes") markov.prepare_data(trails) markov.fit(trails) evi1 = markov.bayesian_evidence() markov = MarkovChain(use_prior=True, reset = True, prior=1., specific_prior=ret2, specific_prior_vocab = vocab, modus="bayes") markov.prepare_data(trails) markov.fit(trails) evi2 = markov.bayesian_evidence() self.assertEqual(evi1, evi2) h5.close() os.remove("test.hdf5") os.remove("out.hdf5")
def order_est(X, r): max_model = r paths = X likelihoods = {} parameters = {} observations = {} state_count_initial = {} evidences = {} #This is for the Bayesian case for i in range(0, max_model + 1): markov = MarkovChain(k=i, use_prior=True, reset=True, modus="bayes") markov.prepare_data(paths) markov.fit(paths) evidence = markov.bayesian_evidence() evidences[i] = evidence del markov model_probas = mt.bayesian_model_selection(evidences=evidences, params=parameters, penalty=False) #print model_probas est_val = max(model_probas.iteritems(), key=operator.itemgetter(1))[0] return (est_val)
def test_evidence_single_row(self): trails = [] with open("../data/test_case_4") as f: for line in f: if line.strip() == "": continue line = line.strip().split(" ") trails.append(np.array(line)) states = set() for row in trails: col = list(row) for c in col: states.add(c) #build the vocabulary for matrix A vocab = dict(((t, i) for i, t in enumerate(states))) A = lil_matrix((5,5)) A[:,0] = 1. A = A.tocsr() ret1 = distr_chips(A, 25) markov = MarkovChain(use_prior=True, reset = True, prior=1., specific_prior=ret1, specific_prior_vocab = vocab, modus="bayes") markov.prepare_data(trails) markov.fit(trails) evi1 = markov.bayesian_evidence() A = lil_matrix((1,5)) A[0,0] = 1. A = A.tocsr() ret2 = distr_chips(A, 5) markov = MarkovChain(use_prior=True, reset = True, prior=1., specific_prior=ret2, specific_prior_vocab = vocab, modus="bayes") markov.prepare_data(trails) markov.fit(trails) evi2 = markov.bayesian_evidence() self.assertEqual(evi1, evi2)
def test_evidence_random_row_reals(self): trails = [] with open("../data/test_case_4") as f: for line in f: if line.strip() == "": continue line = line.strip().split(" ") trails.append(np.array(line)) states = set() for row in trails: col = list(row) for c in col: states.add(c) #build the vocabulary for matrix A vocab = dict(((t, i) for i, t in enumerate(states))) A = rand(5,5, density=0.5, format='csr') ret1 = distr_chips_row(A, 5, mode="integers") markov = MarkovChain(use_prior=True, prior=1., specific_prior=ret1, specific_prior_vocab = vocab, modus="bayes", reset=False) markov.prepare_data(trails) markov.fit(trails) evi1 = markov.bayesian_evidence() ret2 = distr_chips_row(A, 5, mode="reals") markov = MarkovChain(use_prior=True, prior=1., specific_prior=ret2, specific_prior_vocab = vocab, modus="bayes", reset=False) markov.prepare_data(trails) markov.fit(trails) evi2 = markov.bayesian_evidence() self.assertLess(abs(evi1-evi2),2)
continue line = line.strip().split(" ") print len(line) #print line paths.append(np.array(line)) max_model = 3 likelihoods = {} parameters = {} observations = {} state_count_initial = {} #this is for the MLE case for i in range(0, max_model + 1): markov = MarkovChain(k=i, use_prior=False, reset=False, modus="mle") markov.prepare_data(paths) markov.fit(paths) l = markov.loglikelihood() likelihoods[i] = l parameters[i] = markov.parameter_count_ observations[i] = markov.observation_count_ state_count_initial[i] = markov.state_count_ del markov print likelihoods #print some sample statistics (i.e., Akaike Information Criterion) lrts, pvals, dfs = mt.likelihood_ratio_test(likelihoods, parameters)
from scipy.sparse import csr_matrix paths = [] with open("../data/test_case_3") as f: for line in f: if line.strip() == "": continue line = line.strip().split(" ") print len(line) #print line paths.append(np.array(line)) #this is without a specific prior evidences = {} markov = MarkovChain(use_prior=True, reset=True, modus="bayes") markov.prepare_data(paths) markov.fit(paths) evidence = markov.bayesian_evidence() print evidence del markov #this is with a very simple specific prior evidences = {} #we only have states 0 and 1 specific_prior = csr_matrix(np.array([[10, 3], [4, 1]])) #need a vocab for assigning indices of the specific prior to a vocabulary vocab = dict({"0": 0, "1": 1})