Esempio n. 1
0
def GetUserpro():
    employees = CSVFile.loadCSVfile1("./data/allusers/validusers.csv")
    userpro=[]
    for item in employees:
        proresult =[]
        user = item[0]
        state = item[1]
        testsequence = UserSequences.GettraintestdataV2(user)
        if testsequence==0:
            continue
        proresult.append(user)
        proresult.append(state)
        preProcess.GetTransiMatrixV2(user)
        H = HMM(user)
        for sequence in testsequence:
            result = H.hmmV2(sequence)
            proresult.append(result)

        userpro.append(proresult)

    # 得到用户60天的基于改进mm的方法的每天的概率,相乘
    #filename= "./data/allusers/userpro.csv"
    #得到用户60天的基于mm的方法的每天的概率,相加
    filename = "./data/allusers/MM_userpro.csv"
    CSVFile.Writecsvtofile(filename,userpro)
    print userpro
class RunViterbi(object):
    def __init__(self):
        self.maxSequence = []

    def trainHMM(self, filename):
        print("Reading training data from %s" % (filename))

        # Read in the training data from the file
        self.dataset = DataSet(filename)
        self.dataset.readFile(200, "train")

        # Instatiate and train the HMM
        self.hmm = HMM(self.dataset.numStates, self.dataset.numOutputs,
                       self.dataset.trainState, self.dataset.trainOutput)
        self.hmm.train()

        return

    def estMaxSequence(self, filename):

        print("Reading testing data from %s" % (filename))

        # Read in the testing dta from the file
        self.dataset = DataSet(filename)
        self.dataset.readFile(200, "test")

        # Run Viterbi to estimate most likely sequence
        viterbi = Viterbi(self.hmm)
        self.maxSequence = viterbi.mostLikelySequence(self.dataset.testOutput)
Esempio n. 3
0
 def test_inequality(self):
     """
     Compare two posterior distributions.
     The hidden state for the unobserved coin is less likely to be fair
     in the first case.
     """
     # define the dishonest casino model
     fair_state = HMM.HiddenDieState(1 / 6.0)
     loaded_state = HMM.HiddenDieState(0.5)
     M = np.array([[0.95, 0.05], [0.1, 0.9]])
     T = TransitionMatrix.MatrixTransitionObject(M)
     hidden_states = [fair_state, loaded_state]
     # create the hidden markov model object
     hmm_new = InternalModel(T, hidden_states)
     # define a sequence of observations
     observations_a = [1, 6, 6, None, 6, 2, 3, 4, 5, 1]
     observations_b = [1, 6, 6, 6, 6, 2, 3, 4, None, 1]
     # get posterior distributions
     distributions_a = hmm_new.posterior(
         hmm_new.get_dp_info(observations_a))
     distributions_b = hmm_new.posterior(
         hmm_new.get_dp_info(observations_b))
     # Compare the posterior probability that the die was fair
     # at each interesting position.
     p_fair_a = distributions_a[3][0]
     p_fair_b = distributions_b[-2][0]
     self.assertTrue(p_fair_a < p_fair_b)
     self.assertNotAlmostEqual(p_fair_a, p_fair_b)
Esempio n. 4
0
 def test_external_file_model_compatibility(self):
     """
     Test StringIO streams for dynamic programming.
     """
     # define the dishonest casino model
     fair_state = HMM.HiddenDieState(1 / 6.0)
     loaded_state = HMM.HiddenDieState(0.5)
     M = np.array([[0.95, 0.05], [0.1, 0.9]])
     T = TransitionMatrix.MatrixTransitionObject(M)
     hidden_states = [fair_state, loaded_state]
     # define a sequence of observations
     observations = [1, 2, 6, 6, 1, 2, 3, 4, 5, 6]
     # define the observation stream
     o_converter = lineario.IntConverter()
     o_stream = lineario.SequentialStringIO(o_converter)
     o_stream.open_write()
     for x in observations:
         o_stream.write(x)
     o_stream.close()
     # create the reference hidden markov model object
     hmm_old = HMM.TrainedModel(M, hidden_states)
     # create the testing hidden markov model object
     names = ('tmp_f.tmp', 'tmp_s.tmp', 'tmp_b.tmp')
     hmm_new = ExternalModel(T, hidden_states, names)
     # get posterior distributions
     distributions_old = hmm_old.scaled_posterior_durbin(observations)
     hmm_new.init_dp(o_stream)
     distributions_new = list(hmm_new.posterior())
     # assert that the distributions are the same
     self.assertTrue(np.allclose(distributions_old, distributions_new))
Esempio n. 5
0
 def test_scaled_ntransitions_expected_inequality(self):
     """
     In the dishonest casino a run of sixes means not much expected switching.
     Maybe this test should be moved to the HMM module.
     """
     fair_state = HMM.HiddenDieState(1 / 6.0)
     loaded_state = HMM.HiddenDieState(0.5)
     states = [fair_state, loaded_state]
     prandom = 0.1
     # define the new hmm
     cache_size = 100
     transition_object = TransitionMatrix.UniformTransitionObject(
         prandom, len(states))
     hmm = Model(transition_object, [fair_state, loaded_state], cache_size)
     # define sequences of observations
     observations_a = [6, 6, 6, 6, 1, 2, 2, 4, 5, 4]
     observations_b = [6, 6, 6, 6, 1, 6, 6, 6, 5, 6]
     # define the (degenerate) distances between observations
     distances = [1] * (len(observations_a) - 1)
     # use the algorithm to get the expected number of transitions for each observation sequence
     e_a = hmm.scaled_ntransitions_expected(
         hmm.get_dp_info(observations_a, distances))
     e_b = hmm.scaled_ntransitions_expected(
         hmm.get_dp_info(observations_b, distances))
     # assert that we see what we expect
     self.assertTrue(e_a > e_b)
     self.assertNotAlmostEqual(e_a, e_b)
Esempio n. 6
0
def main():
    word_list = []
    dictionary = set()
    syllable_dict = {}
    with open('data/Syllable_dictionary.txt') as f:
        for line in f:
            word_list.append(line.split()[0])
            dictionary.add(line.split()[0])
            syllable_dict[line.split()[0]] = line.split()[1:]

    data = load_data('data/shakespeare.txt', dictionary)
    data_rhyme = load_data_rhyme('data/shakespeare.txt', dictionary)

    data_HMM = encode_data_HMM(data, word_list)
    data_HMM_rhyme = encode_data_HMM(data_rhyme, word_list)

    rhyme_dict = generate_rhyme_pairs(load_data_rhyme('data/shakespeare_no_dumb_poems.txt', dictionary))

    '''
    model = HMM.unsupervised_HMM(data_HMM_rhyme, 20, 100)
    model.save("reversedHMM")
    '''
    modelRhyme = HMM.load_from_file("reversedHMM.npz")
    modelRegular = HMM.load_from_file("HMM20.npz")

    poems_from_various_models(word_list, syllable_dict)

    generate_rhyming_sonnet(modelRhyme, word_list, syllable_dict, rhyme_dict)

    generate_haiku(modelRegular, word_list, syllable_dict)
Esempio n. 7
0
File: hmm.py Progetto: en-san/HMM
def hmm(spike_datas, m, dt, seed, spikestart, spikeend, plotFlag=False):
    """
    並列する時系列データを受け取って、隠れ状態の遷移を推定する。
    この場合の隠れ状態は、時系列の発生率(ポアソンモデルによる発生を仮定している)の状態空間の中のものである。
    例:複数のニューロンの時系列データから隠れ状態を推定する。
    引数:
        spike_datas:複数の並列する時刻データをlistにそれぞれ格納して、listでつないだもの。少し作るのは面倒だが、時刻データの数がそれぞれ違うので、わざわざlistに入れている。
            例:data = np.loadtxt -> data.tolist() -> spike_datas.append(data)みたいに作る
        m:状態数
        dt:時間ビン幅
        seed:初期値を決める際の乱数のタネ
        spikestart:スパイクが始まる時刻
        spikeend:スパイクが終わる
        plotFlag:隠れ状態の遷移をplotするかどうか。Trueなら書く。
    返り値:
        spike_hmm:予測した隠れ状態の遷移
        means:それぞれの状態の時系列データごとのrate
    """
    # t1 = time.time()
    total_step, spike_obs = HMM.get_obs(spikestart, spikeend, spike, dt)

    # 初期値
    pi, a, means = HMM.firstmodel(spike, seed, dt, m, total_step)
    # 計算
    pi, a, means = HMM.baumwelch(pi, a, means, spike_obs)
    # viterbiアルゴリズムの計算
    spike_hmm = HMM.viterbirate(dt, pi, a, means, spike_obs)

    # 図を描きたいならTrueにすればいい
    if plotFlag == True:
        plot_rate(m, dt, spike_hmm)

    # t2 = time.time()

    return spike_hmm, means
Esempio n. 8
0
 def test_scaled_ntransitions_expected_compatibility(self):
     fair_state = HMM.HiddenDieState(1 / 6.0)
     loaded_state = HMM.HiddenDieState(0.5)
     states = [fair_state, loaded_state]
     prandom = 0.1
     # define the old hmm
     transition_matrix = TransitionMatrix.get_uniform_transition_matrix(
         prandom, len(states))
     old_hmm = HMM.TrainedModel(transition_matrix, states)
     # define the new hmm
     cache_size = 100
     transition_object = TransitionMatrix.UniformTransitionObject(
         prandom, len(states))
     new_hmm = Model(transition_object, [fair_state, loaded_state],
                     cache_size)
     # define a sequence of observations
     observations = [1, 2, 6, 6, 1, 2, 3, 4, 5, 6]
     # define the (degenerate) distances between observations
     distances = [1] * (len(observations) - 1)
     # use the old algorithm to get the expected number of transitions
     e_initial, A = old_hmm.scaled_transition_expectations_durbin(
         observations)
     ntransitions_expected_old = np.sum(A) - np.sum(np.diag(A))
     # use the new algorithm to get the expected number of transitions
     dp_info = new_hmm.get_dp_info(observations, distances)
     ntransitions_expected_new = new_hmm.scaled_ntransitions_expected(
         dp_info)
     # assert that the expected number of transitions are almost the same
     self.assertAlmostEqual(ntransitions_expected_old,
                            ntransitions_expected_new)
Esempio n. 9
0
 def __init__(self):
     fair_state = HMM.HiddenDieState(1 / 6.0)
     loaded_state = HMM.HiddenDieState(0.5)
     transition_matrix = np.array([[0.95, 0.05], [0.1, 0.9]])
     transition_object = TransitionMatrix.MatrixTransitionObject(
         transition_matrix)
     cache_size = 100
     Model.__init__(self, transition_object, [fair_state, loaded_state],
                    cache_size)
    def trainHMM(self, filename):
        print("Reading training data from %s" % (filename))

        # Read in the training data from the file
        self.dataset = DataSet(filename)
        self.dataset.readFile(200, "train")

        # Instatiate and train the HMM
        self.hmm = HMM(self.dataset.numStates, self.dataset.numOutputs,
                       self.dataset.trainState, self.dataset.trainOutput)
        self.hmm.train()

        return
Esempio n. 11
0
def hmm_shakespeare_sonnet_goal1():
    # Load in everything
    sonnets, obs_map = sp.get_sonnets("data/shakespeare.txt", 3000)
    obs_map_r = {}
    for key in obs_map:
        obs_map_r[obs_map[key]] = key
    syl_map = sp.get_syllable_map("data/Syllable_dictionary.txt")
    # Train HMM
    model = HMM.unsupervised_HMM(sonnets, 5, 25)
    num_states = model.L
    # Print one blank line to make it pretty
    print("")
    # Generate sonnet
    last_state = np.random.choice(num_states, p=model.A_start)
    for n_lines in [4, 4, 2]:
        for l_no in range(n_lines):
            line = ""
            while (not line):
                curr_state = last_state
                l = ""
                no_syls = 0
                while (no_syls < 10):
                    w, curr_state = add_word(curr_state, model.A, model.O)
                    new_word = obs_map_r[w]
                    l += new_word
                    l += " "
                    no_syls += syl_map[new_word]
                if (count_syllables(l, syl_map) == 10):
                    line = l
            last_state = curr_state
            print(line.capitalize())
        print("")
Esempio n. 12
0
def main():
    folder = 'C:\Users\Chandramohan\Desktop\@IIT\POS Tagger\Data'
    data_file = folder + '\Data.txt'
    inp_file = folder + '\Train.txt'
    out_file = folder + '\Test.txt'

    # Creating vocabulary
    data, tags = Utilities.Get_data(data_file)
    d_vocab, t_vocab = Utilities.Get_vocabulary(data, tags)

    # Get train data
    data, tags = Utilities.Get_data(inp_file)
    train_d, train_t = Utilities.Convert_data(data, tags, d_vocab, t_vocab)

    # Get test data
    data, tags = Utilities.Get_data(out_file)
    test_d, test_t = Utilities.Convert_data(data, tags, d_vocab, t_vocab)

    # Training
    hmm = HMM.HMM(train_d, train_t, d_vocab, t_vocab)
    p_t = []
    for i in range(len(test_d)):
        seq = hmm.Viterbi(test_d[i])
        p_t.append(seq)
        Utilities.Accuracy_item(seq, test_t[i])
    Utilities.Accuracy_model(p_t, test_t)
Esempio n. 13
0
def main(argv):
    # Call a function to construct the emission probabilities of hitting a key
    # given you tried to hit a (potentially) different key.
    b = ce.constructEmissions(pr_correct, adj)

    # Call a function to construct transmission probabilities and a prior distribution
    # from the King James Bible.
    (p, prior) = ct.constructTransitions('bible.txt')

    # Run the Viterbi algorithm on each word of the messages to determine the
    # most likely sequence of characters.
    for t in range(0, len(input)):
        s_in = input[t].split()
        output = ""

        for i, word in enumerate(s_in):
            y = np.zeros(shape=(1, len(word)))

            for j in range(0, len(word)):
                y[0][j] = ord(word[j]) - ord('a')

            # perform the Viterbi algorithm
            x = hmm.HMM(p, prior, b, y[0])

            for j in range(0, len(x)):
                output += chr(int(x[j]) + ord('a'))

            if i != len(s_in) - 1:
                output += ' '

        print(input[t])
        print(output)
        print()
Esempio n. 14
0
def main():
    args = parse_args()

    if args.fasta_files:

        #load matricies
        tp = np.loadtxt(TRANSITION_MATRIX, delimiter=',')
        ep = pd.read_csv(EMISSION_MATRIX, index_col=0)
        with open(INITIAL_MATRIX, 'r') as json_file:
            ip = json.load(json_file)

        #initialize HMM
        cpgi_finder = HMM.HMM(transitions=tp,
                              emissions=ep,
                              initials=ip,
                              states=STATES)

        #predict islands
        results = []
        for fasta_file in args.fasta_files:

            try:
                seq_id, observation = SeqHandler.convert_seq(fasta_file)
            except ValueError as err:
                print('Incorrect file format:', err)
            else:
                result = cpgi_finder.viterbi(observation)
                results.append((seq_id, result))

        if results:
            SeqHandler.writeout(results)
Esempio n. 15
0
def poems_from_various_models(word_list, syllable_dict):
    models = [1, 3, 6, 10, 15, 20, 25]
    for i in models:
        print("### Model " + str(i) + " ####")
        model = HMM.load_from_file('HMM' + str(i) + '.npz')
        generate_sonnet(model, word_list, syllable_dict)
        print()
Esempio n. 16
0
def hmm_shakespeare_sonnet_naive():
    # Load in everything
    sonnets, obs_map = sp.get_sonnets("data/shakespeare.txt", 900)
    obs_map_r = {}
    for key in obs_map:
        obs_map_r[obs_map[key]] = key
    # Train HMM
    model = HMM.unsupervised_HMM(sonnets, 10, 10)
    # Print one blank line to make it pretty
    print("")
    # Generate quatrain 1
    for _ in range(4):
        line = ""
        while (not line):
            l = ""
            emission, states = model.generate_emission(7)
            for i in range(len(emission)):
                e = emission[i]
                w = obs_map_r[e]
                if (i == 0):
                    w = w.capitalize()
                l += w
                l += " "
                line = l
        print(line)
    print("")
    # Generate quatrain 2
    for _ in range(4):
        line = ""
        while (not line):
            l = ""
            emission, states = model.generate_emission(7)
            for i in range(len(emission)):
                e = emission[i]
                w = obs_map_r[e]
                if (i == 0):
                    w = w.capitalize()
                l += w
                l += " "
            line = l
        print(line)
    # Generate couplet
    for _ in range(2):
        line = ""
        while (not line):
            l = ""
            emission, states = model.generate_emission(7)
            for i in range(len(emission)):
                e = emission[i]
                w = obs_map_r[e]
                if (i == 0):
                    w = w.capitalize()
                l += w
                l += " "
            line = get_n_syls(10, l, syl_map)
        print(line)
Esempio n. 17
0
 def test_model_compatibility(self):
     # define the dishonest casino model
     fair_state = HMM.HiddenDieState(1 / 6.0)
     loaded_state = HMM.HiddenDieState(0.5)
     M = np.array([[0.95, 0.05], [0.1, 0.9]])
     T = TransitionMatrix.MatrixTransitionObject(M)
     hidden_states = [fair_state, loaded_state]
     # define a sequence of observations
     observations = [1, 2, 6, 6, 1, 2, 3, 4, 5, 6]
     # create the reference hidden markov model object
     hmm_old = HMM.TrainedModel(M, hidden_states)
     # create the testing hidden markov model object
     hmm_new = InternalModel(T, hidden_states)
     # get posterior distributions
     distributions_old = hmm_old.scaled_posterior_durbin(observations)
     distributions_new = hmm_new.posterior(
         hmm_new.get_dp_info(observations))
     # assert that the distributions are the same
     self.assertTrue(np.allclose(distributions_old, distributions_new))
 def _init_classifiers(self):
     # Initialize classifier objects
     self.fenc = FreemanEncoder()
     self.knn = KNN.KNN()
     self.HMM = HMM.HMM()
     self.NaiveBayes = NaiveBayes.NaiveBayes()
     self.RandomForest = RandomForest.RandomForests()
     self.SVM = svm.SVM_SVC()
     self.LogisticReg = LogisticReg.LogisticReg()
     self.AdaBoost = adaboost.AdaBoost()
     self.GBRT = gbrt.GBRT()
     
     #Train initially on the default data set, if no model saved already
     
     # Initialize KNN, no saved model for KNN
     self.knn.knn_train(CharRecognitionGUI_support.training_dataset, 1.0)
     
     # Initialize HMM
     self.HMM.training(CharRecognitionGUI_support.training_dataset)
     
     # Initialize Naive Bayes
     try:
         pickle.load( open( "./Models/naivebayes_model.p", "rb" ) )
     except IOError:
         self.NaiveBayes.training(CharRecognitionGUI_support.training_dataset)
     
     # Initialize Random Forest
     try:
         pickle.load( open( "./Models/random_forest.p", "rb" ) )
     except IOError:
         self.RandomForest.training(CharRecognitionGUI_support.training_dataset)
     
     # Initialize SVM
     try:
         pickle.load( open( "./Models/svm.p", "rb" ) )
     except IOError:
         self.SVM.training(CharRecognitionGUI_support.training_dataset)
     
     # Initialize Logistic Regression
     try:
         pickle.load( open( "./Models/logistic_model.p", "rb" ) )
     except IOError:
         self.LogisticReg.training(CharRecognitionGUI_support.training_dataset)
         
     # Initialize AdaBoost
     try:
         pickle.load( open( "./Models/AdaBoostClassifier.p", "rb" ) )
     except IOError:
         self.AdaBoost.training(CharRecognitionGUI_support.training_dataset)
         
     # Initialize GBRT
     try:
         pickle.load( open( "./Models/GradientBoostingClassifier.p", "rb" ) )
     except IOError:
         self.GBRT.training(CharRecognitionGUI_support.training_dataset)
Esempio n. 19
0
def segmentWord(oriSentence, useRules=True):
    sentence = preprocess(oriSentence)

    #this judgement is very necessary.
    #if we have initialized the dictionary, we don't have to do it again.
    if (not isInitialized):
        initialize()
    DAG = getDAG(sentence)
    route = {}
    calculateRoute(DAG, route, sentence)
    words = []
    cur = 0
    buf = ""
    N = len(sentence)
    #cur is the current curse of state
    while (cur < N):
        next_cur = route[cur][1]
        word = sentence[cur:next_cur + 1]
        #if the word is just a single char, then it might be un-login word
        #buf is the temporary part.
        if (len(word) == 1):
            buf += word
        else:
            #until we have next word which is not single.
            if (len(buf) != 0):
                #we have buf
                #if this word is in dictionary, usually not
                #or if it is a single word
                if (buf in Possibility or len(buf) == 1):
                    words.append(buf)
                else:
                    #if not , it is an un-login word.
                    #we must use HMM to cut them
                    words += HMM.cutUnrecognized(buf)
                #clear the buf
                buf = ""
            words.append(word)
        cur = next_cur + 1
    if (buf):
        words += HMM.cutUnrecognized(buf)
    return words
Esempio n. 20
0
def HMMClassify(train, trainNE, test):
    """
    The function returns the tagging prediction by the HMM system as
    a dictionary.
    """
    model = HMM.HMM(train, trainNE)
    lines = preprocess.readFile(test)
    prediction = {'PER': [], 'LOC': [], 'ORG': [], 'MISC': []}
    lineNum = 1

    for line in lines:
        if (lineNum % 3) == 1:
            #Line with Tokens
            tags = model.assignTags(line)
        elif (lineNum % 3) == 0:
            #Line with indexes
            indexes = line.strip().split()
            preClass = None
            firstIdx = None
            lastIdx = None
            NEcontinues = False

            for i in range(len(tags)):
                tag = tags[i]
                if tag == 'O':
                    if NEcontinues:
                        #Previous tag ends
                        prediction[preClass].append(firstIdx + '-' + lastIdx)
                    preClass = None
                    firstIdx = None
                    lastIdx = None
                    NEcontinues = False

                else:
                    if NEcontinues:
                        if tag != preClass:
                            #Previous tag ends, new Tag begins
                            prediction[preClass].append(firstIdx + '-' +
                                                        lastIdx)
                            preClass = tag
                            firstIdx = indexes[i]
                            lastIdx = indexes[i]
                        else:
                            #Previous tag continues
                            lastIdx = indexes[i]
                    else:
                        #New tag begins
                        preClass = tag
                        firstIdx = indexes[i]
                        lastIdx = indexes[i]
                        NEcontinues = True
        lineNum += 1
    return prediction
Esempio n. 21
0
def auto_reply(msg):
    if msg.is_at:
        start = msg.text
        print(start)
        song = HMM.generate(start)
        print('正在写歌')
        print(type(song))
        print(song)
        # 回复消息内容和类型tuling
        #answer = tuling.chat(msg.text)
        #return answer
        return song
Esempio n. 22
0
def get_score(test_file, output_file, mode='quick'):
    with open(test_file, "r") as f:
        with open(output_file, "w") as f2:
            for line in f:
                if mode == 'quick':
                    for word in cut_sentense(line):
                        f2.write(word + "  ")
                if mode == 'HMM':
                    for word in HMM.cut_HMM(line):
                        f2.write(word + " ")
                if mode == 'CRF':
                    for word in CRF.cut_CRF(line):
                        f2.write(word + " ")
Esempio n. 23
0
def Create_HMM_Predictions(yearly_df):
    #Create hidden markov model predictions. Calculates the transitions of the ratio, and predicts based on the next direction.

    predictions = []
    num_years = yearly_df.shape[1] - 1
    for i in range(len(yearly_df)):
        years = yearly_df.values[i]
        if np.nan in years:
            predictions.append(np.nan)
        else:
            try:
                direction_years = []
                for i in range(1, num_years):
                    if years[i] > years[i - 1]:
                        direction_years.append(1)
                    else:
                        direction_years.append(0)
                params = HMM.Prep_Forward(direction_years)
                probs = HMM.forward(params, np.array(direction_years))
                recent_year = probs[0][-1]
                direction = np.argmax(recent_year)
                diff = abs((years[-3] - years[-2]))
                if direction == 0:
                    prediction = years[-2] - diff
                else:
                    prediction = years[-2] + diff

                predictions.append(prediction)
            except:
                predictions.append(np.nan)

    real = yearly_df.values[:, -1]
    pred = np.array(predictions)
    error = ((real - pred)**2) / len(real)
    # error=mean_squared_error(real,pred)
    yearly_df["HMM_error"] = error

    return yearly_df
Esempio n. 24
0
def GetResult():
    k=4
    UserSequences.Gettraintestdata(k)
    #employees = Employees.queryEmployees()
    employees = CSVFile.loadCSVfile1("./data/allusers/validusers.csv")
    #employees = CSVFile.loadCSVfile1("./data/allusers/Allusers_state.csv")
    resultlist = []
    avgresult = []

    for item in employees:
        user = item[0]
        state = item[1]
        preProcess.GetTransiMatrix(user)

        H = HMM(user)
        result = H.hmm(user)
        print user, result

        #resultlist.append([user,result])
        avgresultpro = average(result)
        avgresult.append([user,avgresultpro,state])

        print "average:",average(result), state
        result.insert(0, user)
        resultlist.append(result)
#=======
    t1 = time.time()
    start_date = '2009-12-01'
    employees = Employees.queryLeaveEmployees()
    #f = open('./data/Result.txt', 'w')
    f = open('./data/ProSquenceResult.txt', 'w')
    #for user in employees:
#>>>>>>> 34c6dfe6197febd37f54b0a34e607135f3d0d8e2:GetHMMresult.py
        #print user
    resultfile = './data/allusers/'+k+'ResultPro974.csv'
    avgresultfile = './data/allusers/'+k+'AvgResultPro974.csv'
    CSVFile.Writecsvtofile(resultfile,resultlist)
    CSVFile.Writecsvtofile(avgresultfile,avgresult)
Esempio n. 25
0
def do_hmm(num_states=15, verbose=False, give_hmm=False):
    """
    verbose:  output the debugging stress pattern matrix
    give_hmm: if True, 
    """
    reversed_hmm = HMM.unsupervised_HMM(reversed_lines,
                                        n_states=num_states,
                                        N_iters=20,
                                        verbose=verbose)

    rhyming_words = preprocessor.get_rhyme_pairs(preprocessor.load_sonnets())
    rhyming_lines = []
    wanted_stress = [
        True, False, True, False, True, False, True, False, True, False
    ]
    for i in range(7):
        start1, start2 = "", ""
        while start1 not in token2index or start2 not in token2index:
            start1, start2 = random.choice(rhyming_words)
        start1 = token2index[start1]
        start2 = token2index[start2]
        line1 = reversed_hmm.generate_emission_syllables(
            10,
            syllable_dict,
            start1,
            stresses=stresses,
            desired_stresses=[x for x in wanted_stress])[0]
        line2 = reversed_hmm.generate_emission_syllables(
            10,
            syllable_dict,
            start2,
            stresses=stresses,
            desired_stresses=[x for x in wanted_stress])[0]
        if verbose:
            print(wanted_stress)
        rhyming_lines.append((" ".join([index2token[x] for x in line1[::-1]]),
                              " ".join([index2token[x] for x in line2[::-1]])))
    sonnet = "\n".join([
        upper_first(rhyming_lines[0][0]) + ",", rhyming_lines[1][0] + ".",
        upper_first(rhyming_lines[0][1]) + ",", rhyming_lines[1][1] + ".",
        upper_first(rhyming_lines[2][0]) + ",", rhyming_lines[3][0] + ".",
        upper_first(rhyming_lines[2][1]) + ",", rhyming_lines[3][1] + ".",
        upper_first(rhyming_lines[4][0]) + ",", rhyming_lines[5][0] + ".",
        upper_first(rhyming_lines[4][1]) + ",", rhyming_lines[5][1] + ".",
        upper_first(rhyming_lines[6][0]) + ",", rhyming_lines[6][1] + "."
    ])
    if give_hmm:
        return (reversed_hmm, sonnet)
    else:
        return sonnet
Esempio n. 26
0
def train(num_states, is_reversed=False):
    text = open('data/shakespeare.txt').read()
    obs, vocab, inv_vocab = preprocess.get_observations(text)

    if is_reversed:
        for ob in obs:
            ob.reverse()

    filename = f"models/hmm{num_states}" + ('_rev'
                                            if is_reversed else '') + ".txt"
    hmm = HMM.unsupervised_HMM(obs, num_states, 100)
    hmm.save(filename)

    return hmm
def extract(domain):
    # alex = alexa(domain) #alexa排名
    # seonum = seo(domain) #seo收录数
    suff = suffix(domain)  #是否主流域名后缀
    #num = number(domain) #域名中的数字数量
    length = len(domain)
    numratio = numberratio(domain)  #域名中的数字比率
    consnumber = consecutivenumber(domain)  #域名中连续数字的最大长度
    conschar = consecutivechar(domain)  #域名中连续字符的最大长度
    consamenum = consecutivesamechar(domain)  #连续相同字母字符的最大长度
    mvdlen = mvd(domain)
    entr = entropy(domain)
    hmm = HMM.HMM(domain)

    return suff, length, numratio, consnumber, conschar, consamenum, mvdlen, entr, hmm
 def partition(self):
     '''
     Segment the sentence in the showing window
     '''
     sen = self.InputText.toPlainText()
     sen = sen.encode("utf8")
     res = HMM.partition(sen, self.InitiateProb, self.TransProbMatrix,
                         self.EmitProbMatrix)
     stri = ""
     for thing in res:
         stri += thing
         stri += "  "
     #stri=stri.decode("utf8")
     stri = stri[0:len(stri) - 1]
     self.OutputText.setText(stri)
     return None
Esempio n. 29
0
def hmm_shakespeare_sonnet_goal2():
    # Load in everything
    sonnets, obs_map = sp.get_sonnets("data/shakespeare.txt", 2000)
    obs_map_r = {}
    for key in obs_map:
        obs_map_r[obs_map[key]] = key
    syl_map = sp.get_syllable_map("data/Syllable_dictionary.txt")
    rhymes = sp.get_rhymes("data/shakespeare.txt", True)
    # Train HMM
    model = HMM.unsupervised_HMM(sonnets, 5, 25)
    num_states = model.L
    # Print one blank line to make it pretty
    print("")
    # Generate quatrains
    for _ in range(2):
        while (True):
            r = random.randint(0, len(rhymes) - 1)
            rhyme_pair_1 = rhymes[r]
            if (rhyme_pair_1[0] in obs_map and rhyme_pair_1[1] in obs_map):
                break
        while (True):
            r = random.randint(0, len(rhymes) - 1)
            rhyme_pair_2 = rhymes[r]
            if (rhyme_pair_2[0] in obs_map and rhyme_pair_2[1] in obs_map):
                break
        print(
            make_rhyme_line(model, obs_map[rhyme_pair_1[0]], syl_map,
                            obs_map_r))
        print(
            make_rhyme_line(model, obs_map[rhyme_pair_2[0]], syl_map,
                            obs_map_r))
        print(
            make_rhyme_line(model, obs_map[rhyme_pair_1[1]], syl_map,
                            obs_map_r))
        print(
            make_rhyme_line(model, obs_map[rhyme_pair_2[1]], syl_map,
                            obs_map_r))
        print("")
    # Generate couplet
    while (True):
        r = random.randint(0, len(rhymes) - 1)
        rhyme_pair = rhymes[r]
        if (rhyme_pair[0] in obs_map and rhyme_pair[1] in obs_map):
            break
    print(make_rhyme_line(model, obs_map[rhyme_pair[0]], syl_map, obs_map_r))
    print(make_rhyme_line(model, obs_map[rhyme_pair[1]], syl_map, obs_map_r))
    print("")
Esempio n. 30
0
 def test_scaled_posterior_durbin_compatibility(self):
     """
     Test the missing observation model when no observation is missing.
     """
     # define the models
     standard_hmm = HMM.DishonestCasino()
     missing_hmm = DishonestCasino()
     # define a sequence of observations
     observations = [1, 2, 6, 6, 1, 2, 3, 4, 5, 6]
     # define the (degenerate) distances between observations
     distances = [1]*(len(observations) - 1)
     # get posterior distributions with the standard algorithm
     standard_distributions = standard_hmm.scaled_posterior_durbin(observations)
     # get posterior distributions using the degenerate distances
     missing_distributions = missing_hmm.scaled_posterior_durbin(observations, distances)
     # assert that the distributions are the same
     self.assertTrue(np.allclose(standard_distributions, missing_distributions))
Esempio n. 31
0
from HMM import *
status=['a','b']
observation=['m','n']
trans_matrix=[[0.5,0.5],
              [0.5,0.5]]
initial_status=[0,1]
observation_probability_distribution=[[0.3,0.7],[0.3,0.7]]
a=HMM(status,observation,trans_matrix,initial_status,observation_probability_distribution)
print(a.forward_algorithm(['m','n']))
print(a.backward_algorithm(['m','n']))

print(a.viterbi_method1(['m','n'],1))
print(a.viterbi_method2(['m','n']))
Esempio n. 32
0
    print('\nTESTING todict()')
    # todict()
    fromtoken, totoken, dat = todict(tokens, tokenlist)
    test(set(fromtoken.keys()) == set(['0', '1', '\n', '']),
            "todict() fromtoken has correct keys",
            "todict() fromtoken has incorrect keys",
            numTests)
    test(all([totoken[fromtoken[i]] == i for i in fromtoken.keys()]) and
            all([fromtoken[totoken[i]] == i for i in totoken.keys()]),
            "todict() dicts match",
            "todict() dicts don\'t match",
            numTests)

    # Unsupervised prediction using Viterbi
    print("Deterministic viterbi prediction")
    testHMM = HMM(4, fromtoken, totoken, k=k)
    testHMM.learn(dat, tol=0.001)
    predseq = testHMM.predict(max_iters=30)
    print(testHMM.toktostr(predseq))
    # random viterbi
    print("Random Viterbi Test, should oscillate 0-1 (half the time this works)")
    for i in range(5):
        teststr = testHMM.predict(rand=True, max_iters=30)
        print(testHMM.toktostr(teststr))

    print('\nTESTING DUMMY2')
    # Unsupervised prediction on new dummy file, should favor 1212 oscillations
    s, l = parseFile('data/dummy2.txt')
    fromt, tot, ll = todict(s, l)
    dummy1 = HMM(4, fromt, tot, k=2)
    dummy1.learn(ll)
Esempio n. 33
0
#!/usr/bin/env python
# generate transition/emission matricies for the various datasets
# single shakespeare, few shakespeare, truncated shakespeare and full
# full runs too slowly.

from HMM import *
import pickle

f = open('sspeare10.pkl', 'wb')
print("\nSingle Shakespeare")
s, l = parseFile('data/singlespeare.txt')
fromt, tot, ll = todict(s, l)
print("Num tokens: " + str(len(fromt)))
sspeare = HMM(len(fromt), fromt, tot, k=10)
sspeare.learn(ll, tol=0.1, prt=True)
lstwords = sspeare.gettops(numWords=10)
for l in lstwords:
    print(l)
pickle.dump(sspeare, f, -1)

f = open('small10.pkl', 'wb')
print("\nSmall Shakespeare")
s, l = parseFile('data/smallspeare.txt')
fromt, tot, ll = todict(s, l)
print("Num tokens: " + str(len(fromt)))
smallspeare = HMM(len(fromt), fromt, tot, k=10)
smallspeare.learn(ll, tol=0.03, prt=True)
lstwords = smallspeare.gettops(numWords=10)
for l in lstwords:
    print(l)
pickle.dump(smallspeare, f, -1)
Esempio n. 34
0
from HMM import *
status=['a','b']
observation=['m','n']
trans_matrix=[[0.5,0.5],
              [0.5,0.5]]
initial_status=[0.5,0.5]
observation_probability_distribution=[[1,0],[0,1]]
a=HMM(status,observation,trans_matrix,initial_status,observation_probability_distribution)
print(a.forward_algorithm(['m','m']))
Esempio n. 35
0
import sys, HMM, math

DEBUG = False

#===============================================
# Script
#===============================================

DEV_FILE_NAME = sys.argv[1]
TRANS_FILE_NAME = sys.argv[2]
EMIT_FILE_NAME = sys.argv[3]
PRIOR_FILE_NAME = sys.argv[4]

HMM = HMM.HiddenMarkovModel()

HMM.initHMM(TRANS_FILE_NAME, EMIT_FILE_NAME, PRIOR_FILE_NAME)

# for debug
if DEBUG:
	print 'prior', HMM.hmmPrior
	print 'trans', HMM.hmmTrans
	print 'emit', HMM.hmmEmit
	print 'states', HMM.getStates()
	print 'observables', HMM.getObservables()

# actual output
delim = ' '
with open(DEV_FILE_NAME) as FID: 
	for line in FID:
		vObserved = line.strip().split(delim)
		print math.log(HMM.backwardAlg(vObserved))
print "complete!"
#-----------------------------------------------------------------------END - Load the training set

#-----------------------------------------------------------------------START - Load the testing set
sys.stdout.write("Loading testing set...")
testingSentences = getWSJDirectories(6, wsjLocation, 7)
print "complete!"
#-----------------------------------------------------------------------END - Load the testing set

#-----------------------------------------------------------------------START - Extract the lexicon and tags
sys.stdout.write("Extracting lexicon and tags...")
lexicon, tags = extractLexicon_and_Tags(trainingSentences)
print "complete!"
#-----------------------------------------------------------------------END - Extract the lexicon and tags

model = HMM(tags)

#-----------------------------------------------------------------------START - Train the model
sys.stdout.write("Training model...")
model.train(trainingSentences)
print "complete!"
#-----------------------------------------------------------------------END - Train the model

#-----------------------------------------------------------------------START - Evaluate the model on the training set
sys.stdout.write("Evaluating on training set...")
print "{0:.2f}% accuracy".format(model.evaluate(trainingSentences[:100])*100.0)
#-----------------------------------------------------------------------END - Evaluate the model on the training set

#-----------------------------------------------------------------------START - Evaluate the model on the testing set
sys.stdout.write("Evaluating on testing set...")
print "{0:.2f}% accuracy".format(model.evaluate(testingSentences[:100])*100.0)
Esempio n. 37
0
# place training data in list of sentence lists
trainingData = list() # list of lists
vocabulary = set() # vocabulary contains unique words
with open(TRAIN_FILE_NAME) as FID:
	for line in FID:
		data = line.strip().split(' ')
		trainingData.append( data )
		vocabulary.update(set(data))

# random prob assignment or use provided files
if len(sys.argv) == 5:
	# initialization files
	TRANS_FILE_NAME = sys.argv[2]
	EMIT_FILE_NAME = sys.argv[3]
	PRIOR_FILE_NAME = sys.argv[4]
	HMM.initHMM(TRANS_FILE_NAME, EMIT_FILE_NAME, PRIOR_FILE_NAME)
else:
	# init topology with standard files
	HMM.initHMMRand(STATES, vocabulary)

# for debug
if DEBUG:
	print 'Before training'
	print 'prior', HMM.hmmPrior
	print 'trans', HMM.hmmTrans
	print 'emit', HMM.hmmEmit
	print 'states', HMM.getStates()
	print 'observables', HMM.getObservables()

avgLL = HMM.baumWelchAlg(trainingData, True)
import HMM
import randomHmm as r
import avgll as a
import checker

hmm = HMM.getHMM()

data = [line.strip() for line in open("../data/trainnew.txt")][0]


def getXi(data, hmm):
    alpha = a.forward(data, hmm)
    beta = a.backward(data, hmm)
    likelihood = beta[0][0]
    xi = [[[[0, 0], [0, 0]], [[0, 0], [0, 0]]]]
    for t in range(len(data) - 1):
        xi.append([[[0, 0], [0, 0]], [[0, 0], [0, 0]]])
    data = "#" + data
    t = 0
    for h in range(len(data) - 1):
        for i in range(0, 2):
            for j in range(0, 2):
                ptrans = a.getTransitionProbability(i, j, hmm)
                pemit = a.getEmissionProbability(j, data[t + 1], hmm)
                temp = a.multiplyProbability(alpha[t][i], ptrans)
                temp = a.multiplyProbability(temp, pemit)
                temp = a.multiplyProbability(temp, beta[t + 1][j])
                xi[t][i][j] = a.divideProbability(temp, likelihood)
        t += 1
    return xi