Ejemplo n.º 1
0
    def __init__(self, dictionary, correlation, verbose=False):
        self._correlation = correlation
        self._dictionary = dictionary
        self._verbose = verbose

        if self._verbose:
            print self.__doc__

        self.mc = SimpleMarkovClassifier(dtype="unicode")

        self.trainSimpleMarkovClassifier()
        if self._verbose:
            self.print_transition_probabilities()
Ejemplo n.º 2
0
def testSimpleMarkovClassifier():
    mc = SimpleMarkovClassifier(dtype="c")
    text = "after the letter e follows either space or the letters r t or i"

    for word in text.split():
        word = word.lower()

        features = list(zip(" " + word))
        labels = list(word + " ")

        mc.train(mdp.numx.array(features), labels)

    assert mc.input_dim == 1

    num_transitions = 0
    features = mc.features
    for feature, count in list(features.items()):
        if count:
            prob = mc.prob(mdp.numx.array([feature]))
            prob_sum = 0
            for p in prob:
                for k, v in list(p.items()):
                    prob_sum += v
                    if v:
                        num_transitions += 1

            assert abs(prob_sum - 1.0) < 1e-5

    # calculate the number of transitions (the negative set deletes the artefact of two spaces)
    trans = len(set((list(zip("  ".join(text.split()) + " ", \
                         " " + "  ".join(text.split()))))) - set([(' ', ' ')]))
    assert num_transitions == trans

    letters_following_e = [' ', 'r', 't', 'i']
    letters_prob = mc.prob(mdp.numx.array([['e']]))[0]
    prob_sum = 0
    for letter, prob in list(letters_prob.items()):
        prob_sum += prob
        if prob > 1e-5:
            assert letter in letters_following_e

    assert abs(prob_sum - 1.0) < 1e-5