def fit_hmm_learn(seqs, n_states, axis):
    """
    Seqs is a list of numpy vectors
    """
    samples = np.concatenate(seqs)
    lengths = np.array([len(s) for s in seqs])
    if len(samples) < n_states:
        return float('inf'), float('-inf'), None, None
    # assert len(samples) >= n_states
    hmm = GaussianHMM(n_components=n_states)
    hmm.fit(samples, lengths)

    ll = hmm.score(samples, lengths)
    _, labels = hmm.decode(samples, lengths)

    axis.set_title("HMM Learn (ll=%0.2f)" % ll)
    # ax2.plot(means[:, 0], means[:, 1], 'ro')
    # ax2.plot(X[:, :, 0], X[:, :, 1], 'bo')

    possible_colors = ['orange', 'blue', 'green', 'red']
    colors = [possible_colors[e] for e in labels]
    axis.scatter(seqs[:100, :, 0],
                 seqs[:100, :, 1],
                 color=colors[:100],
                 marker='^')
    axis.scatter(seqs[100:200, :, 0],
                 seqs[100:200, :, 1],
                 color=colors[100:200],
                 marker='o')
    axis.scatter(seqs[200:, :, 0],
                 seqs[200:, :, 1],
                 color=colors[200:],
                 marker='s')
    return labels
def test_GaussHMM_decode(cases: str) -> None:
    np.random.seed(12346)
    cases = int(cases)
    i = 1
    N_decimal = 4
    max_iter = 100
    tol=1e-3
    while i < cases:
        n_samples = np.random.randint(10, 50)
        hidden_states = np.random.randint(3, 6)
        n_features = np.random.randint(4, 9)
        X = []
        lengths = []
        for _ in range(n_samples):
            seq_length = np.random.randint(4, 9)
            this_x = np.random.rand(seq_length,n_features)

            X.append(this_x)
            lengths.append(seq_length)

        hmm_gold = GaussianHMM(n_components=hidden_states,
                               covariance_type='full',
                               algorithm='viterbi',
                               n_iter=max_iter,
                               tol=tol)

        X_gold = np.concatenate(X)
        hmm_gold.fit(X_gold, lengths)

        gold_means = hmm_gold.means_
        gold_pi = hmm_gold.startprob_
        gold_n_features = hmm_gold.n_features
        gold_transmat = hmm_gold.transmat_
        gold_means = hmm_gold.means_
        gold_covars = hmm_gold.covars_

        hmm_mine = GaussHMM(hidden_states=hidden_states,
                               A=gold_transmat,
                               n_features=gold_n_features,
                               means=gold_means,
                               covar=gold_covars,
                               pi=gold_pi,
                               tol=tol,
                               max_iter=max_iter)
        gold_logprob,gold_state_seq = hmm_gold.decode(X_gold, lengths)
        mine_logprob_list = []
        mine_state_seq_list = []
        for this_x in X:
            this_logprob, this_state_seq = hmm_mine.decode(this_x)
            mine_logprob_list.append(this_logprob)
            mine_state_seq_list.append(this_state_seq)
        mine_logprob = sum(mine_logprob_list)
        mine_state_seq = np.concatenate(mine_state_seq_list)
        assert_almost_equal(mine_logprob, gold_logprob, decimal=N_decimal)
        assert_almost_equal(mine_state_seq, gold_state_seq, decimal=N_decimal)
        i+=1

    print('Successfully testing the decode function in Gaussian HMM!')
Beispiel #3
0
def fit_and_apply_hmm(normal, infected, chosen, data):
    # define sliding window size and number of components
    win, components = 4, 5
    # uncomment the next line to find the optimal window size and number of components
    # it takes some time though...
    # win, components = find_optimal_params(chosen)

    win_data = get_windows(chosen, win)

    # learn a Gaussian Hidden Markov Model with 4 states from the infected host data
    hmm = GaussianHMM(n_components=components)
    hmm.fit(win_data)
    # store the log-likelihood of the host that trained the model
    modeled_log_likelihood = hmm.decode(win_data)[0]

    hosts_log_likelihood = {}

    # compute log-likelihood of data sequence of normal IPs
    for ip in normal:
        # get the flows of that host only
        host_data = data[(data['src_ip'] == ip) | (data['dst_ip'] == ip)]
        size = len(host_data) - win
        # if host has enough flows for creating a window
        if size > 0:
            # create sliding windows sequences
            normal_data = get_windows(host_data, win)
            # get the log-likelihood of the sequential data
            hosts_log_likelihood[ip] = hmm.decode(normal_data)[0]
        else:
            hosts_log_likelihood[ip] = 0

    # repeat procedure for all infected IPs
    for ip in infected:
        # get the flows of that host only
        host_data = data[(data['src_ip'] == ip) | (data['dst_ip'] == ip)]
        size = len(host_data) - win
        # if host has enough flows for creating a window
        if size > 0:
            # create sliding windows sequences
            infected_data = get_windows(host_data, win)
            # get the log-likelihood of the sequential data
            hosts_log_likelihood[ip] = hmm.decode(infected_data)[0]
        else:
            hosts_log_likelihood[ip] = 0
    return hosts_log_likelihood, modeled_log_likelihood
Beispiel #4
0
def HHM_stock(stock,startdate,enddate,predict_startdate,predict_enddate,hmmcomponents=4,cov_type='full'):
    from hmmlearn.hmm import GMMHMM,GaussianHMM
    import datetime
    import numpy as np
    import pandas as pd
    import warnings
    
    def get_hmm_feature(stock, startdate, enddate):
        df = get_price(stock, start_date=startdate, end_date=enddate, frequency='1d', fields=['close','money','volume','high','low','open'],skip_paused=True)
        close = df['close']
        high = df['high'][5:]
        low = df['low'][5:]
        volume = df['volume'][5:]
        opens= df['open'][5:]
        datelist = pd.to_datetime(close.index[5:])
        logreturn = (np.log(np.array(close[1:]))-np.log(np.array(close[:-1])))[4:]
        logreturn5 = np.log(np.array(close[5:]))-np.log(np.array(close[:-5]))
        rangereturn = (np.log(np.array(high))-np.log(np.array(low)))
        closeidx = close[5:]
        rangereturn = (np.log(np.array(high))-np.log(np.array(low)))
        money = df['money']
        money_ma5= pd.rolling_mean(money,4)
        money_ma5_rate= np.log(np.array(money[5:]))-np.log(np.array(money_ma5[4:-1]))    
        return (closeidx,datelist,np.column_stack([logreturn,rangereturn,logreturn5,money_ma5_rate]))

    closeidx_fit,datelist_fit,data_fit = get_hmm_feature(stock, startdate, enddate)
    closeidx_pred,datelist_pred,data_predict = get_hmm_feature(stock, predict_startdate, predict_enddate)

    warnings.filterwarnings("ignore")  # diag
    hmm = GaussianHMM(n_components = hmmcomponents, covariance_type=cov_type,n_iter = 5000).fit(data_fit)
    #latent_states_sequence = hmm.predict(data_fit)
    
    hidden_state_meaning = hhm_state2read(hmm)
    readable_state_hidden = {meaning:state for state,meaning in hidden_state_meaning.items()}

    _,predict_states_sequence = hmm.decode(data_predict)
    predict_all_scores_sequence = hmm.predict_proba(data_predict)
    predict_states_score_sequence = [predict_all_scores_sequence[idx][s] for idx,s in enumerate(predict_states_sequence)]
    hhm_score =  pd.DataFrame(predict_all_scores_sequence,columns=[hidden_state_meaning[state] for state in range(hmm.n_components)],index=datelist_pred).applymap(lambda x:round(x,5))
    
    hhm_pred =  pd.DataFrame({'close':closeidx_pred
                                     ,"state":predict_states_sequence
                                      ,'score':predict_states_score_sequence
                                     ,'action':[hidden_state_meaning[s] for s in predict_states_sequence]},index=datelist_pred)
    #return pd.concat([hhm_pred,hhm_score],axis=1)
    return (hmm,hhm_pred)
Beispiel #5
0
def find_optimal_params(chosen):
    max_ll = -math.inf
    optimal_win = 0
    optimal_components = 0
    for comp in range(2, 7):
        for win in range(2, 11):
            win_data = get_windows(chosen, win)
            hmm = GaussianHMM(n_components=comp)
            hmm.fit(win_data)
            log_likelihood = hmm.decode(win_data)[0]
            if log_likelihood > max_ll:
                max_ll = log_likelihood
                optimal_win = win
                optimal_components = comp
            # uncomment to see the log-likelihood for each configuration
            # print('Window=%d ,Components=%d, Log-likelihood=%.3f' % (win, comp, log_likelihood))
    print('Profiling: Optimal HMM values: Window=%d, Components=%d' % (optimal_win, optimal_components))
    return optimal_win, optimal_components
Beispiel #6
0
class HMMAnomalyDetector(AnomalyDetector):
    def __init__(self, n_components=4, model=None):
        super().__init__(model=model, abbreviation='hmm')

        self.n_components = n_components

    def fit(self, traces, trace_lens):
        from hmmlearn.hmm import GaussianHMM
        self.model = GaussianHMM(n_components=self.n_components, covariance_type="diag", n_iter=100)
        self.model.fit(traces, trace_lens)

    def predict(self, traces, trace_lens):
        x = np.split(traces, np.cumsum(trace_lens)[:-1])

        log_probs = []
        for seq in x:
            log_probs.append(self.model.decode(seq)[0])

        return np.array(log_probs)
Beispiel #7
0
    # print("Emission Matrix: ")
    # for s in hmm.states:
    #     print("Means")
    #     print(list(s.parameters())[0])

    #     print("Variance")
    #     print(1/list(s.parameters())[1])

    means = torch.stack([list(s.parameters())[0] for s in states])
    means = means.detach().numpy()
    precs = torch.stack([list(s.parameters())[1] for s in states])
    # precs = precs.detach().numpy()
    std = (1 / precs.sqrt()).detach().numpy()
    # print('std', std)

    y_pred, _ = hmm.decode(X)
    y_pred = y_pred.squeeze(1)
    # END FIT

    plt.subplot(len(datasets), 2, plot_num)
    if i_dataset == 0:
        plt.title('torchmm', size=18)

    colors = np.array(
        list(
            islice(
                cycle([
                    '#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628',
                    '#984ea3', '#999999', '#e41a1c', '#dede00'
                ]), int(max(y_pred) + 1))))
    # add black color for outliers (if any)
def predict_states(X,group_id,empirical_states):
	#print("fitting to HMM and decoding ...")
	max_state_number = (group_id+1)*10
	n_components = 2
	
	# make an HMM instance and execute fit
	model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)
	
	# Train n number of HMM to avoid loacl minimal 
	max_score = 0
	max_proba_states = []
	transmat = [[]]
	n = 2
	for i in range(1,n):
		model.fit([X])
		score = model.decode(X)[0]
		if i==1 or max_score < score:
			max_score = score
			max_proba_states = model.predict(X)
			transmat = model.transmat_
		
		'''	
		print "score", score
		# predict the optimal sequence of internal hidden state
		hidden_states = model.predict(X)
		print hidden_states
		'''
	# end multiple training
	
	#print max_score, max_proba_states, transmat
	
	# Compare the state with empirical states
	max_proba_states = max_proba_states.tolist()
	max_proba_states_inver = []
	for s in max_proba_states:
		max_proba_states_inver.append(0 if s == 1 else 1)
	
	#print empirical_states, max_proba_states, max_proba_states_inver
	
	difference_state = np.subtract(np.array(max_proba_states),np.array(empirical_states)).tolist()
	difference_state_inver = np.subtract(np.array(max_proba_states_inver),np.array(empirical_states)).tolist()
	
	difference = np.sum(np.power(difference_state,2))
	difference_inver = np.sum(np.power(difference_state_inver,2))
	
	#print difference, difference_inver
	
	if(difference_inver < difference):
		max_proba_states = max_proba_states_inver
	# end switch bits
	
	# Predict future state
	future_states_proba = np.dot([0,1],transmat)
	future_state = 0
	if future_states_proba[1] > future_states_proba[0]:
		future_state = 1	
	# End
	
	result_states = max_proba_states+[future_state for i in range(0,max_state_number-len(max_proba_states))];
	return result_states		
	print("done\n")
Beispiel #9
0
def build_model(data, columns):
    features_train = np.float32(data[columns].as_matrix())
    discrete_features_train = sliding_window(features_train)
    model = GaussianHMM(n_components=3)
    model.fit(discrete_features_train)
    return (model, model.decode(discrete_features_train)[1])
Beispiel #10
0
def runHmm(patient_record, date_list, group_id, empirical_states):
    ###############################################################################
    # Processing the data
    max_state_number = (group_id + 1) * 10

    X = np.zeros(shape=(max(len(patient_record), 2), 20))
    index = 0
    for date in date_list:
        tmp_list = []
        #print(date)
        for key, value in patient_record[date].iteritems():
            tmp_list.append(value)
        X[index] = np.array(tmp_list)
        index += 1

    # if no lab test is available, train with an all zero array
    if X.shape[0] == 0:
        X = np.zeros(shape=(2, 20))
    elif X.shape[0] == 1:
        X[1] = np.zeros(shape=(1, 20))

    #print(X)
    #print(X.shape)

###############################################################################
# Run Gaussian HMM
    print("fitting to HMM and decoding ...")
    n_components = 2

    # make an HMM instance and execute fit
    model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)

    # Train n number of HMM to avoid loacl minimal
    max_score = 0
    max_proba_states = []
    transmat = [[]]
    n = 2
    for i in range(1, n):
        model.fit([X])
        score = model.decode(X)[0]
        if i == 1 or max_score < score:
            max_score = score
            max_proba_states = model.predict(X)
            transmat = model.transmat_
        '''	
		print "score", score
		# predict the optimal sequence of internal hidden state
		hidden_states = model.predict(X)
		print hidden_states
		'''
    # end multiple training

    #print max_score, max_proba_states, transmat

    # Compare the state with empirical states
    max_proba_states = max_proba_states.tolist()
    max_proba_states_inver = []
    for s in max_proba_states:
        max_proba_states_inver.append(0 if s == 1 else 1)

    #print empirical_states, max_proba_states, max_proba_states_inver

    difference_state = np.subtract(np.array(max_proba_states),
                                   np.array(empirical_states)).tolist()
    difference_state_inver = np.subtract(np.array(max_proba_states_inver),
                                         np.array(empirical_states)).tolist()

    difference = np.sum(np.power(difference_state, 2))
    difference_inver = np.sum(np.power(difference_state_inver, 2))

    #print difference, difference_inver

    if (difference_inver < difference):
        max_proba_states = max_proba_states_inver
    # end switch bits

    # Predict future state
    future_states_proba = np.dot([0, 1], transmat)
    future_state = 0
    if future_states_proba[1] > future_states_proba[0]:
        future_state = 1
    # End

    result_states = max_proba_states + [
        future_state
        for i in range(0, max_state_number - len(max_proba_states))
    ]

    return result_states
    '''
	state = [0,1]
	transmat = np.array(model.transmat_)
	
	print np.dot(state,transmat)
	
	print np.array(model.transmat_)
	
	#print (hidden_states)
	#print (hidden_states.shape)
	'''

    print("done\n")
Beispiel #11
0
def predict_states(X, group_id, empirical_states):
    #print("fitting to HMM and decoding ...")
    max_state_number = (group_id + 1) * 10
    n_components = 2

    # make an HMM instance and execute fit
    model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)

    # Train n number of HMM to avoid loacl minimal
    max_score = 0
    max_proba_states = []
    transmat = [[]]
    n = 2
    for i in range(1, n):
        model.fit([X])
        score = model.decode(X)[0]
        if i == 1 or max_score < score:
            max_score = score
            max_proba_states = model.predict(X)
            transmat = model.transmat_
        '''	
		print "score", score
		# predict the optimal sequence of internal hidden state
		hidden_states = model.predict(X)
		print hidden_states
		'''
    # end multiple training

    #print max_score, max_proba_states, transmat

    # Compare the state with empirical states
    max_proba_states = max_proba_states.tolist()
    max_proba_states_inver = []
    for s in max_proba_states:
        max_proba_states_inver.append(0 if s == 1 else 1)

    #print empirical_states, max_proba_states, max_proba_states_inver

    difference_state = np.subtract(np.array(max_proba_states),
                                   np.array(empirical_states)).tolist()
    difference_state_inver = np.subtract(np.array(max_proba_states_inver),
                                         np.array(empirical_states)).tolist()

    difference = np.sum(np.power(difference_state, 2))
    difference_inver = np.sum(np.power(difference_state_inver, 2))

    #print difference, difference_inver

    if (difference_inver < difference):
        max_proba_states = max_proba_states_inver
    # end switch bits

    # Predict future state
    future_states_proba = np.dot([0, 1], transmat)
    future_state = 0
    if future_states_proba[1] > future_states_proba[0]:
        future_state = 1
    # End

    result_states = max_proba_states + [
        future_state
        for i in range(0, max_state_number - len(max_proba_states))
    ]
    return result_states
    print("done\n")
def runHmm(patient_record,date_list,group_id,empirical_states):
###############################################################################
# Processing the data
	max_state_number = (group_id+1)*10
	
	X = np.zeros(shape=(max(len(patient_record),2),20))
	index = 0
	for date in date_list:
		tmp_list = []
		#print(date)
		for key, value in patient_record[date].iteritems():
			tmp_list.append(value)
		X[index] = np.array(tmp_list)
		index+=1
		
	# if no lab test is available, train with an all zero array
	if X.shape[0]  == 0:
		X = np.zeros(shape=(2,20))
	elif X.shape[0] == 1:
		X[1] = np.zeros(shape=(1,20))
		
	#print(X)	
	#print(X.shape)
	
###############################################################################
# Run Gaussian HMM
	print("fitting to HMM and decoding ...")
	n_components = 2
	
	# make an HMM instance and execute fit
	model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)
	
	# Train n number of HMM to avoid loacl minimal 
	max_score = 0
	max_proba_states = []
	transmat = [[]]
	n = 2
	for i in range(1,n):
		model.fit([X])
		score = model.decode(X)[0]
		if i==1 or max_score < score:
			max_score = score
			max_proba_states = model.predict(X)
			transmat = model.transmat_
		
		'''	
		print "score", score
		# predict the optimal sequence of internal hidden state
		hidden_states = model.predict(X)
		print hidden_states
		'''
	# end multiple training
	
	#print max_score, max_proba_states, transmat
	
	# Compare the state with empirical states
	max_proba_states = max_proba_states.tolist()
	max_proba_states_inver = []
	for s in max_proba_states:
		max_proba_states_inver.append(0 if s == 1 else 1)
	
	#print empirical_states, max_proba_states, max_proba_states_inver
	
	difference_state = np.subtract(np.array(max_proba_states),np.array(empirical_states)).tolist()
	difference_state_inver = np.subtract(np.array(max_proba_states_inver),np.array(empirical_states)).tolist()
	
	difference = np.sum(np.power(difference_state,2))
	difference_inver = np.sum(np.power(difference_state_inver,2))
	
	#print difference, difference_inver
	
	if(difference_inver < difference):
		max_proba_states = max_proba_states_inver
	# end switch bits
	
	# Predict future state
	future_states_proba = np.dot([0,1],transmat)
	future_state = 0
	if future_states_proba[1] > future_states_proba[0]:
		future_state = 1	
	# End
	
	result_states = max_proba_states+[future_state for i in range(0,max_state_number-len(max_proba_states))];
	
	return result_states
	'''
	state = [0,1]
	transmat = np.array(model.transmat_)
	
	print np.dot(state,transmat)
	
	print np.array(model.transmat_)
	
	#print (hidden_states)
	#print (hidden_states.shape)
	'''
		
	print("done\n")
Beispiel #13
0
#X1 = [[0.5], [1.0], [-1.0], [0.42], [0.24]]
#X2 = [[0.5], [1.0], [-1.0], [0.42], [0.24]]

#X = np.concatenate([X1, X2])
#lengths = [len(X1), len(X2)]

modelFor0 = GaussianHMM(n_components=2,
                        n_iter=100).fit(streamTrain0, lengthsTrain0)
#modelFor1 = GaussianHMM(n_components=16, n_iter=100).fit(streamTrain1, lengthsTrain1)
# modelFor0 = GaussianHMM(n_components=2, n_iter=200).fit(trainingDataSet0[0])
predictTraining = np.zeros(shape=(trainingDataSet.shape[0], 1))
results0 = np.zeros(shape=(trainingDataSet.shape[0], 1))
# results1 = np.zeros(shape=(trainingDataSet.shape[0],1))
for i in range(0, trainingDataSet.shape[0]):
    predict0Results0 = modelFor0.decode(trainingDataSet[i].reshape(
        trainingDataSet[i].shape[0], 1),
                                        algorithm='viterbi')[0]
    #predict0Results1 = modelFor1.score(trainingDataSet[i].reshape(trainingDataSet[i].shape[0], 1))
    results0[i] = predict0Results0
    # if predict0Results0 > predict0Results1:
    #     results0[i] = 0
    # else:
    #     results0[i] = 1

    # print(predict0[63])

shapeTra = trainingDataSet.shape[0]
shapeTra0 = trainingDataSet0[0].shape[0]
shapeTra1 = trainingDataSet1[0].shape[0]

modelFor1 = GaussianHMM(n_components=2,
Beispiel #14
0
def MyGaussianHMM():
    from hmmlearn.hmm import GaussianHMM
    df = pd.read_csv(
        "/home/ray/Documents/suibe/2017/建模/Modeling_Preparation/dataset/SZIndex.csv",
        header=-1)
    df.head()
    X = np.array(df.iloc[:, 0:5])

    # 一、未知模型情况下,解决问题3
    model = GaussianHMM(n_components=6, covariance_type="diag",
                        n_iter=1000)  # 方差矩阵为对角阵
    """
    参数解释:
    covariance_type:
        "spherical"     :主对角元素均为1,其余元素为0,独立同分布  (数据不足时,难以进行参数估计)
        "diag"          :主对角元素不为0,其余为0               (一般情况,折中)
        "full"          :所有元素均不为0                      (数据足够进行参数估计时)
    """
    model.fit(X)
    print "隐含状态为: ", model.predict(X)  # 列出每一天的隐含状态
    print "特征数目 %s" % model.n_features
    print "隐状态数目 %s" % model.n_components
    print "起始概率 :", model.startprob_
    print "隐状态转移矩阵", model.transmat_
    ## 每个隐含层对应的特征概率空间假设为正态分布,则可以得到一个model.n_components行model.n_features列的均值矩阵
    print "混淆矩阵:均值部分", model.means_
    print "混淆矩阵:方差部分", model.covars_

    ## 绘图
    hidden_states = model.predict(X)
    tradeDate = df.iloc[:, 5].values
    closeIndex = df.iloc[:, 6].values
    plt.figure(figsize=(15, 8))
    for i in range(model.n_components):
        idx = (hidden_states == i)
        plt.plot_date(pd.to_datetime(tradeDate[idx]),
                      closeIndex[idx],
                      '.',
                      label='%dth hidden state' % i,
                      lw=1)
        plt.legend()
        plt.grid(1)
    plt.show()

    # 二、已知模型情况下,解决问题1,2

    ## 沿用上述模型
    ### 问题1
    print "某天出现该观测的概率为: %s" % np.exp(model.score(X[0]))
    ### 问题2
    log_prob, state = model.decode(X[:10], algorithm="viterbi")
    print "只根据前十天,推断出最有可能的隐含状态序列为:", state

    ## 自己输入模型参数
    ### 一个2特征,4隐状态情况
    startprob = np.array([0.6, 0.3, 0.1, 0.0])
    # The transition matrix, note that there are no transitions possible
    # between component 1 and 3
    transmat = np.array([[0.7, 0.2, 0.0, 0.1], [0.3, 0.5, 0.2, 0.0],
                         [0.0, 0.3, 0.5, 0.2], [0.2, 0.0, 0.2, 0.6]])
    # The means of each component
    means = np.array([[0.0, 0.0], [0.0, 11.0], [9.0, 10.0], [11.0, -1.0]])
    # The covariance of each component
    covars = .5 * np.tile(np.identity(2), (4, 1, 1))
    model2 = GaussianHMM(n_components=4, covariance_type="full", n_iter=1000)
    model2.startprob_ = startprob
    model2.transmat_ = transmat
    model2.means_ = means
    model2.covars_ = covars