def order_est(X, r):
    max_model = r

    paths = X

    likelihoods = {}
    parameters = {}
    observations = {}
    state_count_initial = {}

    evidences = {}

    #This is for the Bayesian case
    for i in range(0, max_model + 1):
        markov = MarkovChain(k=i, use_prior=True, reset=True, modus="bayes")
        markov.prepare_data(paths)
        markov.fit(paths)

        evidence = markov.bayesian_evidence()
        evidences[i] = evidence

        del markov

    model_probas = mt.bayesian_model_selection(evidences=evidences,
                                               params=parameters,
                                               penalty=False)

    #print model_probas

    est_val = max(model_probas.iteritems(), key=operator.itemgetter(1))[0]
    return (est_val)
예제 #2
0
    def test_evidence_random_row_reals(self):
        trails = []
        with open("../data/test_case_4") as f:
            for line in f:
                if line.strip() == "":
                    continue
                line = line.strip().split(" ")
                trails.append(np.array(line))

        states = set()
        for row in trails:
            col = list(row)
            for c in col:
                states.add(c)

        #build the vocabulary for matrix A
        vocab = dict(((t, i) for i, t in enumerate(states)))

        A = rand(5,5, density=0.5, format='csr')

        ret1 = distr_chips_row(A, 5, mode="integers")

        markov = MarkovChain(use_prior=True, prior=1., specific_prior=ret1,
                                    specific_prior_vocab = vocab, modus="bayes", reset=False)
        markov.prepare_data(trails)
        markov.fit(trails)

        evi1 = markov.bayesian_evidence()

        ret2 = distr_chips_row(A, 5, mode="reals")

        markov = MarkovChain(use_prior=True, prior=1., specific_prior=ret2,
                                    specific_prior_vocab = vocab, modus="bayes", reset=False)
        markov.prepare_data(trails)
        markov.fit(trails)

        evi2 = markov.bayesian_evidence()

        self.assertLess(abs(evi1-evi2),2)
예제 #3
0
    def test_evidence_uniform_morestates(self):
        trails = []
        with open("../data/test_case_4") as f:
            for line in f:
                if line.strip() == "":
                    continue
                line = line.strip().split(" ")
                trails.append(np.array(line))

        states = set()
        for row in trails:
            col = list(row)
            for c in col:
                states.add(c)

        #build the vocabulary for matrix A
        vocab = dict(((t, i) for i, t in enumerate(states)))

        A = lil_matrix((6,6))
        A[:] = 1.
        A = A.tocsr()

        ret1 = distr_chips(A, 36)

        markov = MarkovChain(use_prior=True, prior=1., specific_prior=ret1,
                                    specific_prior_vocab = vocab, modus="bayes", state_count=6, reset=False)
        markov.prepare_data(trails)
        markov.fit(trails)

        evi1 = markov.bayesian_evidence()


        markov = MarkovChain(use_prior=True, prior=2., modus="bayes", state_count=6, reset=False)
        markov.prepare_data(trails)
        markov.fit(trails)

        evi2 = markov.bayesian_evidence()


        self.assertEqual(evi1, evi2)
예제 #4
0
    def test_evidence_hdf5(self):
        trails = []
        with open("../data/test_case_4") as f:
            for line in f:
                if line.strip() == "":
                    continue
                line = line.strip().split(" ")
                trails.append(np.array(line))

        states = set()
        for row in trails:
            col = list(row)
            for c in col:
                states.add(c)

        #build the vocabulary for matrix A
        vocab = dict(((t, i) for i, t in enumerate(states)))

        A = lil_matrix((5,5))
        A[vocab["1"],vocab["2"]] = 0.6
        A[vocab["1"],vocab["1"]] = 0.4
        A[vocab["3"],vocab["5"]] = 0.4

        A = A.tocsr()

        hdf5_save(A,"test.hdf5")

        ret1 = distr_chips(A, 25)
        distr_chips_hdf5_sparse("test.hdf5", 25, A.sum(), "out.hdf5")

        h5 = tb.open_file("out.hdf5", 'r')

        ret2 = h5.root

        markov = MarkovChain(use_prior=True, reset = True, prior=1., specific_prior=ret1,
                                    specific_prior_vocab = vocab, modus="bayes")
        markov.prepare_data(trails)
        markov.fit(trails)

        evi1 = markov.bayesian_evidence()

        markov = MarkovChain(use_prior=True, reset = True, prior=1., specific_prior=ret2,
                                    specific_prior_vocab = vocab, modus="bayes")
        markov.prepare_data(trails)
        markov.fit(trails)

        evi2 = markov.bayesian_evidence()

        self.assertEqual(evi1, evi2)

        h5.close()
        os.remove("test.hdf5")
        os.remove("out.hdf5")
예제 #5
0
            continue
        line = line.strip().split(" ")
        print len(line)
        #print line
        paths.append(np.array(line))

max_model = 3

likelihoods = {}
parameters = {}
observations = {}
state_count_initial = {}

#this is for the MLE case
for i in range(0, max_model + 1):
    markov = MarkovChain(k=i, use_prior=False, reset=False, modus="mle")
    markov.prepare_data(paths)
    markov.fit(paths)

    l = markov.loglikelihood()
    likelihoods[i] = l
    parameters[i] = markov.parameter_count_
    observations[i] = markov.observation_count_
    state_count_initial[i] = markov.state_count_

    del markov

print likelihoods

#print some sample statistics (i.e., Akaike Information Criterion)
lrts, pvals, dfs = mt.likelihood_ratio_test(likelihoods, parameters)
예제 #6
0
from scipy.sparse import csr_matrix

paths = []
with open("../data/test_case_3") as f:
    for line in f:
        if line.strip() == "":
            continue
        line = line.strip().split(" ")
        print len(line)
        #print line
        paths.append(np.array(line))

#this is without a specific prior
evidences = {}

markov = MarkovChain(use_prior=True, reset = True, modus="bayes")
markov.prepare_data(paths)
markov.fit(paths)



evidence = markov.bayesian_evidence()

print evidence

del markov

#this is with a very simple specific prior
evidences = {}
#we only have states 0 and 1
specific_prior = csr_matrix(np.array([[10,3],[4,1]]))
예제 #7
0
from scipy.sparse import csr_matrix

paths = []
with open("../data/test_case_3") as f:
    for line in f:
        if line.strip() == "":
            continue
        line = line.strip().split(" ")
        print len(line)
        #print line
        paths.append(np.array(line))

#this is without a specific prior
evidences = {}

markov = MarkovChain(use_prior=True, reset=True, modus="bayes")
markov.prepare_data(paths)
markov.fit(paths)

evidence = markov.bayesian_evidence()

print evidence

del markov

#this is with a very simple specific prior
evidences = {}
#we only have states 0 and 1
specific_prior = csr_matrix(np.array([[10, 3], [4, 1]]))
#need a vocab for assigning indices of the specific prior to a vocabulary
vocab = dict({"0": 0, "1": 1})
예제 #8
0
        print len(line)
        #print line
        paths.append(np.array(line))


max_model = 3

likelihoods = {}
parameters = {}
observations = {}
state_count_initial = {}


#this is for the MLE case
for i in range(0,max_model+1):
    markov = MarkovChain(k=i, use_prior=False, reset = False, modus="mle")
    markov.prepare_data(paths)
    markov.fit(paths)
    
    l = markov.loglikelihood()
    likelihoods[i] = l
    parameters[i] = markov.parameter_count_
    observations[i] = markov.observation_count_
    state_count_initial[i] = markov.state_count_
    
    del markov

print likelihoods


#print some sample statistics (i.e., Akaike Information Criterion)