def predictions_rand(filename, company, dt1, dt2, num_of_states, test_num,
                     days_future):
    # Generate samples starting in a random state

    model = joblib.load(filename)

    quotes = quotes_historical_yahoo_ochl(company, dt1, dt2)
    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])
    volume = np.array([q[5] for q in quotes])[1:]

    # Take diff of close value. Note that this makes
    # len(diff) = len(close_t) - 1 therefore, other quantities also need to be shifted by 1

    diff = np.diff(close_v)
    dates = dates[1:]
    close_v = close_v[1:]

    X = np.column_stack([diff])

    # Predict the most likely current internal hidden state
    hidden_probs = model.predict_proba(X)
    lstate_prob = hidden_probs[-1]

    total2active = 364 / 251  # Ratio of days the market is open to all days
    days = days_future // total2active  # 251 open market days in a year
    predictions = []  # Might be useful to store the predictions for future use
    print(days)

    startprob = np.zeros(num_of_states)
    for start_st_prob in range(num_of_states):
        startprob[start_st_prob] = 1.0 / num_of_states

    model_2_sample = GaussianHMM(n_components=num_of_states,
                                 covariance_type="full")
    model_2_sample.startprob_ = startprob
    model_2_sample.transmat_ = model.transmat_
    model_2_sample.means_ = model.means_
    model_2_sample.covars_ = model.covars_

    random.seed()
    rseed = random.randrange(0, sys.maxint)
    X, Z = model_2_sample.sample(days, random_state=rseed)
    avg_prediction = 0

    for test in range(test_num):
        final_price = close_v[-1]
        for i in range(days):
            if ((final_price + X[i]) > 0):
                final_price += X[i]

        predictions.append(final_price[0])
        rseed = random.randrange(0, sys.maxint)
        X, Z = model_2_sample.sample(days, random_state=rseed)

    return predictions
Beispiel #2
0
def hidden_markov_model(hidden_states_count, train, test, time, sample_size, data_name, f):
    #Create an HMM and fit it to data
    model = GaussianHMM(algorithm='viterbi', n_components=hidden_states_count, covariance_type='diag', n_iter=10000)
    model.fit(train)
    
    #Decode the optimal sequence of internal hidden state (Viterbi)
    hidden_states = model.predict(test)

    #Prob next step
    prob_next_step = model.transmat_[hidden_states[-1], :]

    #Generate new sample (visible, hidden)
    X, Z = model.sample(sample_size)
   
    #Plot Data
    plot_time_series(test, hidden_states, hidden_states_count, time, data_name+' - Predict')
    points = get_points(model)
    plot_gaussians(train, points, hidden_states_count, data_name+' - Gaussian Predict')
    plot_time_series(X, Z, hidden_states_count, title=data_name+' - Sample')
   
    #Write Data
    f.write('\n'+data_name+'\n')
    f.write('Transition Matrix:\n'+str(model.transmat_)+'\n')
    f.write('\nNext Step '+str(prob_next_step)+'\n')
    for point in points:
        f.write('\nHidden Variable NO° '+str(point['no'])+'\n\tMean: '+str(point['mean'])+'\n\tSigma: '+str(point['sigma'])+'\n')
    f.write('\n#######################################\n')
Beispiel #3
0
def ma_er_ke_fu():
    warnings.filterwarnings("ignore")
    print('现在进行的算法是马尔科夫')
    # 加载数据集
    data_path = 'D:\PycharmProjects\predict_system\马尔科夫\data_hmm.txt'
    df = pd.read_csv(data_path, header=None)
    print(df.info())  # 查看数据信息,确保没有错误
    print(df.head())
    print(df.tail())

    # 画出原始数据的走势图
    df.iloc[:, 2].plot()

    dataset_X = df.iloc[:, 2].values.reshape(1, -1).T  # 前面两列是日期,用第2列作数据集
    # 需要构建成二维数组形式,故而需要加上一个轴
    print(dataset_X.shape)  # 有3312个训练样本组成一列

    # 建立HMM模型,并训练

    model = GaussianHMM(n_components=4, covariance_type="diag", n_iter=1000)
    model.fit(dataset_X)

    # 预测其状态
    hidden_states = model.predict(dataset_X)

    for i in range(model.n_components):  # 打印出每个隐含状态
        mean = model.means_[i][0]
        variance = np.diag(model.covars_[i])[0]
        print('Hidden state: {}, Mean={:.3f}, Variance={:.3f}'.format(
            (i + 1), mean, variance))

    # 使用HMM模型生成数据
    N = 1000
    samples, _ = model.sample(N)
    plt.plot(samples[:, 0])

    # 模型的提升,修改n_components

    for i in [8, 12, 16, 18, 20]:
        model = GaussianHMM(n_components=i,
                            covariance_type="diag",
                            n_iter=1000)
        model.fit(dataset_X)
        samples, _ = model.sample(1000)
        plt.plot(samples[:, 0])
        plt.title('hidden state N={}'.format(i))
        plt.show()
Beispiel #4
0
def predict_one(filename, company, dt1, dt2,num_of_states, days_future, tr_prob):
# Generate samples starting in the most likely actual current state
       
    model = joblib.load(filename) 
    
    rp = getrealprice_series(company, dt2,days_future)
    days = rp.size
    
    quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) 
    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])

    # Take diff of close value and shift by 1    
    diff = np.diff(close_v)

    dates = dates[1:]
    close_v = close_v[1:]    
    
    X = np.column_stack([diff])

    # Predict the most likely current internal hidden state
    hidden_probs = model.predict_proba(X)
    lstate_prob = hidden_probs[-1] 
    

    
    # If more than one state, make sure we start at the most likely current state
    if (num_of_states>1):
        startprob = np.zeros(num_of_states)
        startprob[lstate_prob.argmax()] = 1.0
    else:
        startprob = [ 1.]

    # Prepare the model for sampling
    model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full")
    model_2_sample.startprob_ = startprob
    model_2_sample.transmat_ = model.transmat_
    model_2_sample.means_ = model.means_
    model_2_sample.covars_ = model.covars_

    #Make sure to randomize the samples
    random.seed()
    rseed = random.randrange(0,max_int_value)
    X, Z = model_2_sample.sample(days, random_state=rseed)
    
    # Make predictions
    predictions = np.zeros(days) #added two in case there was a weekend at the end
    
        
    final_price = rp[0] #start at day 0 of the real prices
    predictions[0] = final_price   #day 0 prediction same as current real price

    for i in range(1, days):
        final_price += X[i][0]
        predictions[i] = final_price
            
    return predictions
def hmm_gen_TS(N, hmm_n=4):
    TS = np.zeros([N,T])
    model = GaussianHMM(n_components=hmm_n, covariance_type="diag", n_iter=1000)
    for n in range(N):
        if n%10 == 0:
            p('generating {0}th sample by HMM'.format(n))
        r = np.random.randint(0, data_all.shape[0])
        ts_real = np.array(data_all.loc[r, '0':])
        X = np.column_stack([ts_real])
        model.fit(X)
        ts, Z = model.sample(T)
        ts = ts[:,0]
        TS[n,:] = (ts-np.mean(ts))/np.std(ts)
    return TS
Beispiel #6
0
def bench_gaussian_hmm(size):
    title = "benchmarking Gaussian HMM on a sample of size {0}".format(size)
    print(title.center(36, " "))
    ghmm = GaussianHMM()
    ghmm.means_ = [[42], [24]]
    ghmm.covars_ = [[1], [1]]

    with timed_step("generating sample"):
        sample, _states = ghmm.sample(size)

    with timed_step("fitting"):
        fit = GaussianHMM(n_components=2).fit([sample])

    with timed_step("estimating states"):
        fit.predict(sample)
Beispiel #7
0
class HMMGoalModel(object):
    def __init__(self, per_data, per_lens=None, n_states=None):
        if per_lens is None:
            per_lens = list(map(len, per_data))

        if len(per_data.shape) > 2:
            per_data = per_data.reshape(-1, per_data.shape[-1])

        if n_states is None:
            components = [2, 4, 6, 8, 10]

            hmms = [GaussianHMM(n_components=c) for c in components]

            map(lambda g: g.fit(per_data, per_lens), hmms)
            scores = map(lambda g: aic(g, per_data, per_lens), hmms)

            max_score, self.hmm = sorted(zip(scores, hmms))[0]
        else:
            self.hmm = GaussianHMM(n_components=n_states)
            self.hmm.fit(per_data, per_lens)

        ll = self.hmm.score(per_data, per_lens)
        print "Goal HMM n_components", self.hmm.n_components, "Log likelihood", ll

        upper_idxs = [per_lens[0] - 1]
        start_idxs = [0]
        for i in range(1, len(per_lens)):
            upper_idxs.append(upper_idxs[i - 1] + per_lens[i])
            start_idxs.append(start_idxs[i - 1] + per_lens[i - 1])

        self.final_states = np.array(self.hmm.predict(per_data,
                                                      per_lens))[upper_idxs]
        print self.final_states
        self.T = int(np.mean(per_lens))
        self.n_components = self.hmm.n_components

    def is_success(self, per_trj):
        per_trj = np.array(per_trj)
        states = self.hmm.predict(per_trj)
        final_state = states[-1]
        return final_state in self.final_states

    def sample(self, t=None):
        t = self.T if t is None else t
        return self.hmm.sample(t)
Beispiel #8
0
def fitHMM(Q, nSamples):
	global model

	loaded = False
	if os.path.isfile('/home/khaled/saied_ws/src/khaled_model_hmm/src/train.pkl'):	
		# load it again
		with open('/home/khaled/saied_ws/src/khaled_model_hmm/src/train.pkl', 'rb') as fid:
			model = cPickle.load(fid)
			loaded = True
			print("loaded")

	if not loaded:
		print("training")
		# fit Gaussian HMM to Q
		model = GaussianHMM(n_components=2, n_iter=1000).fit(np.reshape(Q,[len(Q),1]))
		 
		# save the classifier
		with open('/home/khaled/saied_ws/src/khaled_model_hmm/src/train.pkl', 'wb') as fid:
			cPickle.dump(model, fid)

	# classify each observation as state 0 or 1
	hidden_states = model.predict(np.reshape(Q,[len(Q),1]))
 
	# find parameters of Gaussian HMM
	mus = np.array(model.means_)
	sigmas = np.array(np.sqrt(np.array([np.diag(model.covars_[0]),np.diag(model.covars_[1])])))
	P = np.array(model.transmat_)
 
	# find log-likelihood of Gaussian HMM
	Prob = model.score(np.reshape(Q,[len(Q),1]))
 
	# generate nSamples from Gaussian HMM
	samples = model.sample(nSamples)
	print(model.transmat_)
	print('score',Prob)
	print('hidden states',len(hidden_states))
	# re-organize mus, sigmas and P so that first row is lower mean (if not already)
	if mus[0] > mus[1]:
	    mus = np.flipud(mus)
	    sigmas = np.flipud(sigmas)
	    P = np.fliplr(np.flipud(P))
	    hidden_states = 1 - hidden_states

	return hidden_states, mus, sigmas, P, Prob, samples
Beispiel #9
0
def main():

    df = pd.read_csv('Price and rate and houses sold.csv', parse_dates=True)
    df.drop(df.index[0])

    prices = np.array(df['price'])
    num_of_houses_sold = np.array(df['total_house_sold'])
    rate = np.array(df['rate'])
    dates = np.array(df['period'])

    diff_percentages = 100.0 * np.diff(prices) / prices[:-1]
    diff_percentages = np.append([0], diff_percentages)

    data = np.column_stack([diff_percentages, prices])

    hmm = GaussianHMM(n_components=15,
                      covariance_type='tied',
                      n_iter=100000,
                      algorithm='viterbi',
                      random_state=False)

    hmm.fit(data)
    pred_count = 12

    num_samples = len(data)
    samples, _ = hmm.sample(num_samples + pred_count)
    print(samples)

    plt.figure()
    plt.xlabel('Days starting from Jan 1990')
    plt.ylabel('House prices predicted and actual')
    plt.title('Days vs Prices')

    plt.plot(np.arange(num_samples + pred_count), samples[:, 1], 'r--',
             np.arange(num_samples), prices[:num_samples], 'b-')
    plt.ylim(ymin=0)
    plt.show()
Beispiel #10
0
def predictions_mls(filename, company, dt1, dt2,num_of_states,test_num, days_future, tr_prob):
# Generate samples starting in the most likely actual current state
       
    model = joblib.load(filename) 
    
    rp = getrealprice_series(company, dt2,days_future)
    days = rp.size
    
    quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) 
    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])


    # Take diff of close value and shift by 1    
    diff = np.diff(close_v)

    dates = dates[1:]
    close_v = close_v[1:]    
    
    X = np.column_stack([diff])

    # Predict the most likely current internal hidden state
    hidden_probs = model.predict_proba(X)
    lstate_prob = hidden_probs[-1] 
    

    
    # If more than one state, make sure we start at the most likely current state
    if (num_of_states>1):
        startprob = np.zeros(num_of_states)
        startprob[lstate_prob.argmax()] = 1.0
    else:
        startprob = [ 1.]

    # Prepare the model for sampling
    model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full")
    model_2_sample.startprob_ = startprob
    model_2_sample.transmat_ = model.transmat_
    model_2_sample.means_ = model.means_
    model_2_sample.covars_ = model.covars_

    #Make sure to randomize the samples
    random.seed()
    rseed = random.randrange(0,max_int_value)
    X, Z = model_2_sample.sample(days, random_state=rseed)
    
    # Make predictions
    avg_prediction = 0 
    allpredictions = np.zeros((test_num, days)) #added two in case there was a weekend at the end
    
    for test in range(test_num): 
        
        final_price = rp[0] #start at day 0 of the real prices
        allpredictions[test][0] = final_price   #day 0 prediction same as current real price
        
        for i in range(1, days):
            final_price += X[i][0]

            allpredictions[test][i] = final_price
            
        rseed = random.randrange(0,max_int_value)
        X, Z = model_2_sample.sample(days, random_state=rseed)



    predictions = allpredictions.mean(axis=0)
    predictions_var = allpredictions.var(axis=0)
    predictions_median =  np.median(allpredictions, axis=0)    

    
    errors = predictions - rp 
    tr_prob_vector = np.full((predictions.size),tr_prob)
    
    data = [predictions,rp, errors, tr_prob_vector, 
            predictions_var,predictions_median]

    err_final = errors[-1]
    
    print ("Start Price: ",rp[0],"Avg. Prediction: ",str(num_of_states),"states:" ,
           predictions[-1]," Real Price:", rp[-1]) 
    print (" Error end of predictions:", err_final,"Delta Start-End:", rp[0]-rp[-1],"\n")
    #print ("Real prices:", rp)
    #print ("Predicted prices", predictions)
    
    fname = "Predictions_"+str(company)+"_States_"+str(num_of_states)+"_stats.csv"
    fname = os.path.join('./sims_final', fname)
    np.savetxt(fname, data, delimiter=",")

    
    return
Beispiel #11
0
# from matplotlib.finance import quotes_historical_yahoo_ochl
from hmmlearn.hmm import GaussianHMM

params = {'tickers': 'INTC', 'begin': '1994-04-05', 'end': '2015-07-03'}
r = requests.get('https://quantprice.herokuapp.com/api/v1.1/scoop/period',
                 params=params)
data = r.json()
quotes = np.array(data["datatable"]["data"])[:, -6:]

# Get quotes from Yahoo finance
# quotes = quotes_historical_yahoo_ochl("INTC",datetime.date(1994, 4, 5), datetime.date(2015, 7, 3))

# Extract the required values
dates = quotes[:, 0]
closing_values = quotes[:, 4]
volume_of_shares = quotes[1:, 5]
# Take diff of closing values and computing rate of change
diff_percentage = 100.0 * np.diff(closing_values) / closing_values[:-1]
dates = dates[1:]
# Stack the percentage diff and volume values column-wise for training
X = np.column_stack([diff_percentage, volume_of_shares])
X = [x for x in X if x[0] != None and x[1] != None]
# Create and train Gaussian HMM
print "\nTraining HMM...."
model = GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000)
model.fit(X)
# Generate data using model
num_samples = 500
a, samples = model.sample(num_samples)
plt.plot(np.arange(num_samples), samples, c='black')
plt.show()
def predictions_mls(filename, company, refcompany, dt1, dt2, num_of_states,
                    test_num):
    # Generate samples starting in the most likely actual current state

    days_future = 365

    model = joblib.load(filename)

    quotes = quotes_historical_yahoo_ochl(company, dt1, dt2)
    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])
    volume = np.array([q[5] for q in quotes])[1:]

    # Take diff of close value. Note that this makes
    # len(diff) = len(close_t) - 1 therefore, other quantities also need to be shifted by 1

    diff = np.diff(close_v)
    dates = dates[1:]
    close_v = close_v[1:]

    # Unpack quotes Company2
    quotes2 = quotes_historical_yahoo_ochl(refcompany, dt1, dt2)
    close_v2 = np.array([q[2] for q in quotes2])
    diff2 = np.diff(close_v2)
    close_v2 = close_v2[1:]

    #print (diff2.shape)

    delta = diff2.shape[0] - diff.shape[0]
    delta = abs(delta)

    diff0 = np.pad(diff, (delta, 0), mode='constant', constant_values=0)
    close_v = np.pad(close_v, (delta, 0), mode='constant', constant_values=0)

    #print (diff.shape)
    #print (diff0.shape)

    X = np.column_stack([diff0, diff2])

    # Predict the most likely current internal hidden state
    hidden_probs = model.predict_proba(X)
    lstate_prob = hidden_probs[-1]

    days = int(days_future // total2active)  # 251 open market days in a year
    print(days, strftime("%Y-%m-%d %H:%M:%S", gmtime()))  #debugging purposes

    if (num_of_states > 1):
        startprob = np.zeros(num_of_states)
        startprob[lstate_prob.argmax()] = 1.0
    else:
        startprob = [1.]

    model_2_sample = GaussianHMM(n_components=num_of_states,
                                 covariance_type="full")
    model_2_sample.startprob_ = startprob
    model_2_sample.transmat_ = model.transmat_
    model_2_sample.means_ = model.means_
    model_2_sample.covars_ = model.covars_

    random.seed()
    rseed = random.randrange(0, max_int_value)
    X, Z = model_2_sample.sample(days, random_state=rseed)
    avg_prediction = 0

    allpredictions = np.zeros((test_num, yr))
    for test in range(test_num):
        final_price = close_v[-1]
        j = 0
        for i in range(days):
            if ((final_price + X[i][0]) > 0):
                final_price += X[i][0]
            if (j > 1 and i % 5 == 0):
                allpredictions[test][j] = final_price
                allpredictions[test][j + 1] = final_price
                allpredictions[test][j + 2] = final_price
                j = j + 3
            else:
                allpredictions[test][j] = final_price
                j = j + 1

        while (j < allpredictions.shape[1]):
            allpredictions[test][j] = final_price
            j = j + 1

        rseed = random.randrange(0, max_int_value)
        X, Z = model_2_sample.sample(days, random_state=rseed)

    predictions_year = allpredictions.mean(axis=0)
    print("Avg. Prediction: ", predictions_year[-1])

    fname = "Year_of_predictions_" + str(company) + "_States_" + str(
        num_of_states) + "_adv.csv"
    fname = os.path.join('./sims3', fname)
    np.savetxt(fname, predictions_year, delimiter=",")

    return allpredictions[:, days_future -
                          2], allpredictions[:, (days_future - 2) /
                                             4], allpredictions[:,
                                                                (days_future -
                                                                 2) / 36]
Beispiel #13
0
startprob = np.array([0.6, 0.3, 0.1, 0.0])

transmat = np.array([[0.7, 0.2, 0.0, 0.1],
                     [0.3, 0.5, 0.2, 0.0],
                     [0.0, 0.3, 0.5, 0.2],
                     [0.2, 0.0, 0.2, 0.6]])

means = np.array([[0.0, 0.0],
                  [0.0, 11.0],
                  [9.0, 10.0],
                  [11.0, -1.0]])

covars = .5 * np.tile(np.identity(2), (4, 1, 1))

model = GaussianHMM(n_components=4, covariance_type="full")
model.startprob_ = startprob
model.transmat_ = transmat
model.means_ = means
model.covars_ = covars

X, state_sequence = model.sample(n_samples=5)

plt.plot(X[:, 0], X[:, 1], ".-", label="observations", ms=6,
         mfc="orange", alpha=0.7)
for i, m in enumerate(means):
    plt.text(m[0], m[1], 'Component %i' % (i + 1),
    size=12, horizontalalignment='center',
    bbox=dict(alpha=.7, facecolor='w'))
plt.legend(loc='best')
plt.show()
Beispiel #14
0
# Load input data
data = np.loadtxt('data_1D.txt', delimiter=',')

# Extract the data column (third column) for training
X = np.column_stack([data[:, 2]])

# Create a Gaussian HMM
num_components = 5
hmm = GaussianHMM(n_components=num_components,
                  covariance_type='diag',
                  n_iter=1000)

# Train the HMM
print('\nTraining the Hidden Markov Model...')
hmm.fit(X)

# Print HMM stats
print('\nMeans and variances:')
for i in range(hmm.n_components):
    print('\nHidden state', i + 1)
    print('Mean =', round(hmm.means_[i][0], 2))
    print('Variance =', round(np.diag(hmm.covars_[i])[0], 2))

# Generate data using the HMM model
num_samples = 1200
generated_data, _ = hmm.sample(num_samples)
plt.plot(np.arange(num_samples), generated_data[:, 0], c='black')
plt.title('Generated data')

plt.show()
Beispiel #15
0
from hmmlearn.hmm import GaussianHMM
from Slicing_time_series_data import read_data
#   hmmlearn实现了三种HMM模型类,按照观测状态是连续状态还是离散状态,可以分为两类。GaussianHMM和GMMHMM是连续观测状态的HMM模型,而MultinomialHMM是离散观测状态的模型,也是我们在HMM原理系列篇里面使用的模型。
#加载数据
data = np.loadtxt('data_1D.txt', delimiter=',')

#提取第三列进行训练
x = np.column_stack([data[:, 2]])

#创建GaussianHMM,参数:状态的数量n_components=5,covariance_type='diag',“diag” - 每个状态使用对角协方差矩阵。n_iter要执行的最大迭代次数。
num_components = 5
hmm = GaussianHMM(n_components=num_components,
                  covariance_type='diag',
                  n_iter=1000)

#训练HMM
print("\n正在训练隐马尔科夫模型....")
hmm.fit(x)
#输出每个HMM状态的平均值和方差
print("\n均值和方差:")
for i in range(hmm.n_components):
    print('\n隐状态', i + 1)
    print('均值 = ', round(hmm.means_[i][0], 2))
    print("方差 = ", round(np.diag(hmm.covars_[i])[0], 2))

#生成1200条数据训练HMM模型并绘出
num_samples = 1200
generated_data, _ = hmm.sample(num_samples)  #_约定不关心数字的变量,后期不使用
plt.plot(np.arange(num_samples), generated_data[:, 0], c='red')
plt.title('Gnenerate data')
plt.show()
mean = np.array([[0.0, 0.0],
                 [0.0, 10.0],
                 [10.0, 0.0]])

# Setting the mean
model_gaussian.means_ = mean

# As emission probability is a 2-D gaussian distribution, thus
# covariance matrix for each state would be a 2-D matrix, thus
# overall the covariance matrix for all the states would be in the
# form of (n_components, 2, 2)
covariance = 0.5 * np.tile(np.identity(2), (3, 1, 1))
model_gaussian.covars_ = covariance

# model.sample returns both observations as well as hidden states
# the first return argument being the observation and the second
# being the hidden states
Z, X = model_gaussian.sample(100)

# Plotting the observations
plt.plot(Z[:, 0], Z[:, 1], "-o", label="observations",
         ms=6, mfc="orange", alpha=0.7)

# Indicate the state numbers
for i, m in enumerate(mean):
    plt.text(m[0], m[1], 'Component %i' % (i + 1),
             size=17, horizontalalignment='center',
             bbox=dict(alpha=.7, facecolor='w'))
plt.legend(loc='best')
plt.show()
Beispiel #17
0
    def hmm_calculate(self):
        """ Расчет Hidden Markov Models"""
        """ подготовим выбранные тиражи"""
        start = time.time()
        print("Начинаем считать в ", datetime.datetime.fromtimestamp(start).strftime("%d-%m-%y %H:%M:%S"))
        fromDraw= self.widget.spinBoxFromDraw.value()
        toDraw= self.widget.spinBoxToDraw.value()

        checkFromDraw= self.widget.spinBoxCheckFromDraw.value()
        checkToDraw= self.widget.spinBoxCheckToDraw.value()
        iter=self.widget.spinBoxIterations.value()

        if (toDraw-fromDraw)<=3:
            QMessageBox.warning(self, 'Предупреждение', "Обучающих примеров недостаточно", QMessageBox.Cancel )
            return

        predictCount=self.widget.spinBoxPredictCount.value()

        draws=self.db.get_draws_balls_numpy(fromDraw,toDraw)

        if draws.size == 0:
            QMessageBox.warning(self, 'Предупреждение', "Обучающих примеров нет", QMessageBox.Cancel )
            return

        checkDraws=self.db.get_draws_balls_numpy(checkFromDraw,checkToDraw)
        print("\n-checkDraws-\n")
        print(checkDraws)
        print(checkDraws.shape)
        if checkDraws.size == 0:
            QMessageBox.warning(self, 'Предупреждение', "Проверочных примеров нет", QMessageBox.Cancel )
            return
        # Create a Gaussian HMM 
        
        num_components = 7
        if (len(draws)/2)<num_components:
            num_components=int(len(draws)/2-1);

        if num_components<1:
            num_components=1
        print("num_components=",num_components)

        covar_type = str(self.widget.comboBoxCovarianceType.currentText())
        print("используем ",covar_type)
        """n_components — определяет число скрытых состояний. Относительно неплохие модели можно строить, используя 6-8 скрытых состояний. Habr. Но у меня при больших значениях
        могло выдать ошибку rows of transmat_ must sum to 1.0 - видимо зависит от числа обучающих примеров
        Остальные параметры отвечают за сходимость EM-алгоритма, ограничивая число итераций, точность и определяя тип ковариационных параметров состояний.
        https://habr.com/ru/post/351462/
        """
        try:
            hmm = GaussianHMM(n_components=num_components, covariance_type=covar_type, n_iter=iter) #tied дает ошибки для 4x20 при малом наборе
            # Train the HMM  https://ogrisel.github.io/scikit-learn.org/sklearn-tutorial/modules/generated/sklearn.hmm.GaussianHMM.html
            print('\nTraining the Hidden Markov Model...')
            with warnings.catch_warnings():
                warnings.simplefilter('ignore')
                hmm.fit(draws)
            # Print HMM stats
            print('\nMeans and variances:')
            for i in range(hmm.n_components):
                print('\nHidden state', i+1)
                print('Mean =', round(hmm.means_[i][0], 2))
                print('Variance =', round(np.diag(hmm.covars_[i])[0], 2))
            print("\n-Generate data using the HMM model-\n")
            # Generate data using the HMM model
            predicted_data, _ = hmm.sample(predictCount) 

            predicted_data=np.array(predicted_data,dtype=int) #преобразуем в int
            print('predicted_data:', predicted_data,", type/shape/ndim ", type(predicted_data),predicted_data.shape,predicted_data.ndim)

            self.print_results(predicted_data,checkDraws)
            end = time.time()
            print("затрачено: ",time.strftime('%H:%M:%S', time.gmtime(end - start))) 
        except Exception as e:
            print('HMM:HmmCalculate error: ', e)
            dbg_except()
            self.widget.plainTextEdit.setPlainText(str(e))
        pass #end HmmCalculate
    # ts, data = util.load_data("../data/beijing_pm25.csv", columnName="pm2.5")
    # ts, data = util.load_data("../data/pollution.csv", columnName="Ozone")

    train, test = util.divideTrainTest(data)
    print("train shape is", train.shape)
    print("test shape is", test.shape)
    history = [x[0] for x in train]
    predictions = []
    realTestY = []

    for t in range(len(test)):

        model = GaussianHMM(n_components=2)
        model.fit(train)

        output = model.sample(1)

        yhat = output[0][0]

        predictions.append(yhat)
        obs = test[t][0]
        train = np.append(train, obs).reshape(-1, 1)
        realTestY.append(obs)
        print('t:%d, predicted=%f, expected=%f' % (t, yhat, obs))

    realTestY = np.array(test)
    predictions = np.array(predictions).reshape(-1)
    print("pred:", predictions)
    MAE = eval.calcMAE(realTestY, predictions)
    RMSE = eval.calcRMSE(realTestY, predictions)
    MAPE = eval.calcSMAPE(realTestY, predictions)
Beispiel #19
0
mean = np.array([[0.0, 0.0], [0.0, 10.0], [10.0, 0.0]])

# Setting the mean
model_gaussian.means_ = mean

# As emission probability is a 2-D gaussian distribution, thus
# covariance matrix for each state would be a 2-D matrix, thus
# overall the covariance matrix for all the states would be in the
# form of (n_components, 2, 2)
covariance = 0.5 * np.tile(np.identity(2), (3, 1, 1))
model_gaussian.covars_ = covariance

# model.sample returns both observations as well as hidden states
# the first return argument being the observation and the second
# being the hidden states
Z, X = model_gaussian.sample(100)

# Plotting the observations
plt.plot(Z[:, 0],
         Z[:, 1],
         "-o",
         label="observations",
         ms=6,
         mfc="orange",
         alpha=0.7)

# Indicate the state numbers
for i, m in enumerate(mean):
    plt.text(m[0],
             m[1],
             'Component %i' % (i + 1),
intc = yf.Ticker('INTC').history(start=start_date, end=end_date)

# Take the percentage difference of closing stock prices
diff_percentages = 100.0 * np.diff(intc.Close) / intc.Close[:-1]

# Stack the differences and volume values column-wise for training
training_data = np.column_stack([diff_percentages, intc.Volume[:-1]])

# Create and train Gaussian HMM 
hmm = GaussianHMM(n_components=7, covariance_type='diag', n_iter=1000)
with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    hmm.fit(training_data)

# Generate data using the HMM model
num_samples = 300 
samples, _ = hmm.sample(num_samples) 

# Plot the difference percentages 
plt.figure()
plt.title('Difference percentages')
plt.plot(np.arange(num_samples), samples[:, 0], c='black')

# Plot the volume of shares traded
plt.figure()
plt.title('Volume of shares')
plt.plot(np.arange(num_samples), samples[:, 1], c='black')
plt.ylim(ymin=0)

plt.show()
Beispiel #21
0
#%%
from hmmlearn.hmm import GaussianHMM
model = GaussianHMM(n_components=8, covariance_type="diag", n_iter=1000)
model.fit(dataset_X)
#%%
hidden_states = model.predict(dataset_X)
#%%
for i in range(model.n_components):  # 打印出每个隐含状态
    mean = model.means_[i][0]
    variance = np.diag(model.covars_[i])[0]
    print('Hidden state: {}, Mean={:.3f}, Variance={:.3f}'.format(
        (i + 1), mean, variance))
#%%
# 使用HMM模型生成数据
N = 2348
samples, _ = model.sample(N)
plt.plot(samples[:, 0])

#%%
print(samples)
import numpy
numpy.savetxt("Hours_HMMpred.csv", samples, delimiter=',')
#%%
plt.plot(dataset_X[:N], c='red', label='train')  # 将实际涨幅和预测的涨幅绘制到一幅图中方便比较
plt.plot(samples[:, 0], c='blue', label='Predicted')
plt.legend()
#%%
for i in [8, 12, 16, 18, 20]:
    model = GaussianHMM(n_components=i, covariance_type="diag", n_iter=1000)
    model.fit(dataset_X)
    samples, _ = model.sample(400)
Beispiel #22
0
print('\n\nParameters of appliances model.\n- Transition matrix: \n',
      appl_hmm.transmat_)
logProb = appl_hmm.score(appl_data)
print('\n Log likelihood: \n', round(logProb, 2))
# LIGHTS
print('\n\nParameters of lights model.\n- Transition matrix: \n',
      lights_hmm.transmat_)
logProb = lights_hmm.score(np.reshape(appl_data, [len(lights_data), 1]))
print('\n- Log likelihood: \n', round(logProb, 2))
'''
                                    SAMPLING
'''
# Generate new samples (visible, hidden)

# APPL
X1, Z1 = appl_hmm.sample(
    143)  # 143 is the number of measurement per day (24 hours)
plt.figure(2)
plt.plot(X1)
plt.plot(Z1 * 10)
plt.title('Samples generated for appliances')
plt.xlabel('Samples')
# LIGHTS
X2, Z2 = lights_hmm.sample(143)
plt.figure(3)
plt.plot(X2)
plt.plot(Z2)
plt.title('Samples generated for lights')
plt.xlabel('Samples')
'''
                                PROB DISTRIB COMPARISON
'''
Beispiel #23
0
import numpy as np
import matplotlib.pyplot as plt
from hmmlearn.hmm import GaussianHMM

# 从输入文件中加载数据
input_file = 'CNY.csv'
data = np.loadtxt(input_file, delimiter=',')

# 提取需要的值
closing_values = np.array(data[:, 6])
volume_of_shares = np.array(data[:, 8])[:-1]

# 计算每天收盘价变化率
diff_percentage = 100.0 * np.diff(closing_values) / closing_values[:-1]

# 将变化率与交易量组合起来
X = np.column_stack((diff_percentage, volume_of_shares))

# 创建并训练高斯隐马尔科夫模型
print(u"训练高斯隐马尔科夫模型中......")
model = GaussianHMM(n_components=5, covariance_type='diag', n_iter=1000)
model.fit(X)

# 用模型生成数据
num_samples = 500
samples, _ = model.sample(num_samples)
plt.plot(np.arange(num_samples), samples[:, 0], c='black')
plt.figure()
plt.plot(np.arange(num_samples), samples[:, 1], c='red')
plt.show()
Beispiel #24
0
# 训练高斯HMM模型
model = GaussianHMM(n_components=8, covariance_type="diag", n_iter=1000)
model.fit(transaction)

#%%
#打印出每个隐含状态
for i in range(model.n_components):
    mean = model.means_[i][0]
    variance = np.diag(model.covars_[i])[0]
    print('Hidden state: {}, Mean={:.3f}, Variance={:.3f}'.format(
        (i + 1), mean, variance))

#%%
#使用HMM模型生成数据
N = 385
samples, _ = model.sample(N)
plt.plot(samples[:, 0])
#%%
plt.plot(np.arange(N), samples[:, 0])
plt.title('Number of components = ' + str(N))

plt.show()

#%%
plt.plot(feature1[:N], c='red', label='Rise')  # 将实际涨幅和预测的涨幅绘制到一幅图中方便比较
plt.plot(samples[:, 0], c='blue', label='Predicted')
plt.legend()

#%%
plt.plot(feature2[:N], c='red', label='numbers')
plt.plot(samples[:, 1], c='blue', label='Predicted')