def predictions_rand(filename, company, dt1, dt2, num_of_states, test_num, days_future): # Generate samples starting in a random state model = joblib.load(filename) quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) volume = np.array([q[5] for q in quotes])[1:] # Take diff of close value. Note that this makes # len(diff) = len(close_t) - 1 therefore, other quantities also need to be shifted by 1 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] X = np.column_stack([diff]) # Predict the most likely current internal hidden state hidden_probs = model.predict_proba(X) lstate_prob = hidden_probs[-1] total2active = 364 / 251 # Ratio of days the market is open to all days days = days_future // total2active # 251 open market days in a year predictions = [] # Might be useful to store the predictions for future use print(days) startprob = np.zeros(num_of_states) for start_st_prob in range(num_of_states): startprob[start_st_prob] = 1.0 / num_of_states model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full") model_2_sample.startprob_ = startprob model_2_sample.transmat_ = model.transmat_ model_2_sample.means_ = model.means_ model_2_sample.covars_ = model.covars_ random.seed() rseed = random.randrange(0, sys.maxint) X, Z = model_2_sample.sample(days, random_state=rseed) avg_prediction = 0 for test in range(test_num): final_price = close_v[-1] for i in range(days): if ((final_price + X[i]) > 0): final_price += X[i] predictions.append(final_price[0]) rseed = random.randrange(0, sys.maxint) X, Z = model_2_sample.sample(days, random_state=rseed) return predictions
def hidden_markov_model(hidden_states_count, train, test, time, sample_size, data_name, f): #Create an HMM and fit it to data model = GaussianHMM(algorithm='viterbi', n_components=hidden_states_count, covariance_type='diag', n_iter=10000) model.fit(train) #Decode the optimal sequence of internal hidden state (Viterbi) hidden_states = model.predict(test) #Prob next step prob_next_step = model.transmat_[hidden_states[-1], :] #Generate new sample (visible, hidden) X, Z = model.sample(sample_size) #Plot Data plot_time_series(test, hidden_states, hidden_states_count, time, data_name+' - Predict') points = get_points(model) plot_gaussians(train, points, hidden_states_count, data_name+' - Gaussian Predict') plot_time_series(X, Z, hidden_states_count, title=data_name+' - Sample') #Write Data f.write('\n'+data_name+'\n') f.write('Transition Matrix:\n'+str(model.transmat_)+'\n') f.write('\nNext Step '+str(prob_next_step)+'\n') for point in points: f.write('\nHidden Variable NO° '+str(point['no'])+'\n\tMean: '+str(point['mean'])+'\n\tSigma: '+str(point['sigma'])+'\n') f.write('\n#######################################\n')
def ma_er_ke_fu(): warnings.filterwarnings("ignore") print('现在进行的算法是马尔科夫') # 加载数据集 data_path = 'D:\PycharmProjects\predict_system\马尔科夫\data_hmm.txt' df = pd.read_csv(data_path, header=None) print(df.info()) # 查看数据信息,确保没有错误 print(df.head()) print(df.tail()) # 画出原始数据的走势图 df.iloc[:, 2].plot() dataset_X = df.iloc[:, 2].values.reshape(1, -1).T # 前面两列是日期,用第2列作数据集 # 需要构建成二维数组形式,故而需要加上一个轴 print(dataset_X.shape) # 有3312个训练样本组成一列 # 建立HMM模型,并训练 model = GaussianHMM(n_components=4, covariance_type="diag", n_iter=1000) model.fit(dataset_X) # 预测其状态 hidden_states = model.predict(dataset_X) for i in range(model.n_components): # 打印出每个隐含状态 mean = model.means_[i][0] variance = np.diag(model.covars_[i])[0] print('Hidden state: {}, Mean={:.3f}, Variance={:.3f}'.format( (i + 1), mean, variance)) # 使用HMM模型生成数据 N = 1000 samples, _ = model.sample(N) plt.plot(samples[:, 0]) # 模型的提升,修改n_components for i in [8, 12, 16, 18, 20]: model = GaussianHMM(n_components=i, covariance_type="diag", n_iter=1000) model.fit(dataset_X) samples, _ = model.sample(1000) plt.plot(samples[:, 0]) plt.title('hidden state N={}'.format(i)) plt.show()
def predict_one(filename, company, dt1, dt2,num_of_states, days_future, tr_prob): # Generate samples starting in the most likely actual current state model = joblib.load(filename) rp = getrealprice_series(company, dt2,days_future) days = rp.size quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) # Take diff of close value and shift by 1 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] X = np.column_stack([diff]) # Predict the most likely current internal hidden state hidden_probs = model.predict_proba(X) lstate_prob = hidden_probs[-1] # If more than one state, make sure we start at the most likely current state if (num_of_states>1): startprob = np.zeros(num_of_states) startprob[lstate_prob.argmax()] = 1.0 else: startprob = [ 1.] # Prepare the model for sampling model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full") model_2_sample.startprob_ = startprob model_2_sample.transmat_ = model.transmat_ model_2_sample.means_ = model.means_ model_2_sample.covars_ = model.covars_ #Make sure to randomize the samples random.seed() rseed = random.randrange(0,max_int_value) X, Z = model_2_sample.sample(days, random_state=rseed) # Make predictions predictions = np.zeros(days) #added two in case there was a weekend at the end final_price = rp[0] #start at day 0 of the real prices predictions[0] = final_price #day 0 prediction same as current real price for i in range(1, days): final_price += X[i][0] predictions[i] = final_price return predictions
def hmm_gen_TS(N, hmm_n=4): TS = np.zeros([N,T]) model = GaussianHMM(n_components=hmm_n, covariance_type="diag", n_iter=1000) for n in range(N): if n%10 == 0: p('generating {0}th sample by HMM'.format(n)) r = np.random.randint(0, data_all.shape[0]) ts_real = np.array(data_all.loc[r, '0':]) X = np.column_stack([ts_real]) model.fit(X) ts, Z = model.sample(T) ts = ts[:,0] TS[n,:] = (ts-np.mean(ts))/np.std(ts) return TS
def bench_gaussian_hmm(size): title = "benchmarking Gaussian HMM on a sample of size {0}".format(size) print(title.center(36, " ")) ghmm = GaussianHMM() ghmm.means_ = [[42], [24]] ghmm.covars_ = [[1], [1]] with timed_step("generating sample"): sample, _states = ghmm.sample(size) with timed_step("fitting"): fit = GaussianHMM(n_components=2).fit([sample]) with timed_step("estimating states"): fit.predict(sample)
class HMMGoalModel(object): def __init__(self, per_data, per_lens=None, n_states=None): if per_lens is None: per_lens = list(map(len, per_data)) if len(per_data.shape) > 2: per_data = per_data.reshape(-1, per_data.shape[-1]) if n_states is None: components = [2, 4, 6, 8, 10] hmms = [GaussianHMM(n_components=c) for c in components] map(lambda g: g.fit(per_data, per_lens), hmms) scores = map(lambda g: aic(g, per_data, per_lens), hmms) max_score, self.hmm = sorted(zip(scores, hmms))[0] else: self.hmm = GaussianHMM(n_components=n_states) self.hmm.fit(per_data, per_lens) ll = self.hmm.score(per_data, per_lens) print "Goal HMM n_components", self.hmm.n_components, "Log likelihood", ll upper_idxs = [per_lens[0] - 1] start_idxs = [0] for i in range(1, len(per_lens)): upper_idxs.append(upper_idxs[i - 1] + per_lens[i]) start_idxs.append(start_idxs[i - 1] + per_lens[i - 1]) self.final_states = np.array(self.hmm.predict(per_data, per_lens))[upper_idxs] print self.final_states self.T = int(np.mean(per_lens)) self.n_components = self.hmm.n_components def is_success(self, per_trj): per_trj = np.array(per_trj) states = self.hmm.predict(per_trj) final_state = states[-1] return final_state in self.final_states def sample(self, t=None): t = self.T if t is None else t return self.hmm.sample(t)
def fitHMM(Q, nSamples): global model loaded = False if os.path.isfile('/home/khaled/saied_ws/src/khaled_model_hmm/src/train.pkl'): # load it again with open('/home/khaled/saied_ws/src/khaled_model_hmm/src/train.pkl', 'rb') as fid: model = cPickle.load(fid) loaded = True print("loaded") if not loaded: print("training") # fit Gaussian HMM to Q model = GaussianHMM(n_components=2, n_iter=1000).fit(np.reshape(Q,[len(Q),1])) # save the classifier with open('/home/khaled/saied_ws/src/khaled_model_hmm/src/train.pkl', 'wb') as fid: cPickle.dump(model, fid) # classify each observation as state 0 or 1 hidden_states = model.predict(np.reshape(Q,[len(Q),1])) # find parameters of Gaussian HMM mus = np.array(model.means_) sigmas = np.array(np.sqrt(np.array([np.diag(model.covars_[0]),np.diag(model.covars_[1])]))) P = np.array(model.transmat_) # find log-likelihood of Gaussian HMM Prob = model.score(np.reshape(Q,[len(Q),1])) # generate nSamples from Gaussian HMM samples = model.sample(nSamples) print(model.transmat_) print('score',Prob) print('hidden states',len(hidden_states)) # re-organize mus, sigmas and P so that first row is lower mean (if not already) if mus[0] > mus[1]: mus = np.flipud(mus) sigmas = np.flipud(sigmas) P = np.fliplr(np.flipud(P)) hidden_states = 1 - hidden_states return hidden_states, mus, sigmas, P, Prob, samples
def main(): df = pd.read_csv('Price and rate and houses sold.csv', parse_dates=True) df.drop(df.index[0]) prices = np.array(df['price']) num_of_houses_sold = np.array(df['total_house_sold']) rate = np.array(df['rate']) dates = np.array(df['period']) diff_percentages = 100.0 * np.diff(prices) / prices[:-1] diff_percentages = np.append([0], diff_percentages) data = np.column_stack([diff_percentages, prices]) hmm = GaussianHMM(n_components=15, covariance_type='tied', n_iter=100000, algorithm='viterbi', random_state=False) hmm.fit(data) pred_count = 12 num_samples = len(data) samples, _ = hmm.sample(num_samples + pred_count) print(samples) plt.figure() plt.xlabel('Days starting from Jan 1990') plt.ylabel('House prices predicted and actual') plt.title('Days vs Prices') plt.plot(np.arange(num_samples + pred_count), samples[:, 1], 'r--', np.arange(num_samples), prices[:num_samples], 'b-') plt.ylim(ymin=0) plt.show()
def predictions_mls(filename, company, dt1, dt2,num_of_states,test_num, days_future, tr_prob): # Generate samples starting in the most likely actual current state model = joblib.load(filename) rp = getrealprice_series(company, dt2,days_future) days = rp.size quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) # Take diff of close value and shift by 1 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] X = np.column_stack([diff]) # Predict the most likely current internal hidden state hidden_probs = model.predict_proba(X) lstate_prob = hidden_probs[-1] # If more than one state, make sure we start at the most likely current state if (num_of_states>1): startprob = np.zeros(num_of_states) startprob[lstate_prob.argmax()] = 1.0 else: startprob = [ 1.] # Prepare the model for sampling model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full") model_2_sample.startprob_ = startprob model_2_sample.transmat_ = model.transmat_ model_2_sample.means_ = model.means_ model_2_sample.covars_ = model.covars_ #Make sure to randomize the samples random.seed() rseed = random.randrange(0,max_int_value) X, Z = model_2_sample.sample(days, random_state=rseed) # Make predictions avg_prediction = 0 allpredictions = np.zeros((test_num, days)) #added two in case there was a weekend at the end for test in range(test_num): final_price = rp[0] #start at day 0 of the real prices allpredictions[test][0] = final_price #day 0 prediction same as current real price for i in range(1, days): final_price += X[i][0] allpredictions[test][i] = final_price rseed = random.randrange(0,max_int_value) X, Z = model_2_sample.sample(days, random_state=rseed) predictions = allpredictions.mean(axis=0) predictions_var = allpredictions.var(axis=0) predictions_median = np.median(allpredictions, axis=0) errors = predictions - rp tr_prob_vector = np.full((predictions.size),tr_prob) data = [predictions,rp, errors, tr_prob_vector, predictions_var,predictions_median] err_final = errors[-1] print ("Start Price: ",rp[0],"Avg. Prediction: ",str(num_of_states),"states:" , predictions[-1]," Real Price:", rp[-1]) print (" Error end of predictions:", err_final,"Delta Start-End:", rp[0]-rp[-1],"\n") #print ("Real prices:", rp) #print ("Predicted prices", predictions) fname = "Predictions_"+str(company)+"_States_"+str(num_of_states)+"_stats.csv" fname = os.path.join('./sims_final', fname) np.savetxt(fname, data, delimiter=",") return
# from matplotlib.finance import quotes_historical_yahoo_ochl from hmmlearn.hmm import GaussianHMM params = {'tickers': 'INTC', 'begin': '1994-04-05', 'end': '2015-07-03'} r = requests.get('https://quantprice.herokuapp.com/api/v1.1/scoop/period', params=params) data = r.json() quotes = np.array(data["datatable"]["data"])[:, -6:] # Get quotes from Yahoo finance # quotes = quotes_historical_yahoo_ochl("INTC",datetime.date(1994, 4, 5), datetime.date(2015, 7, 3)) # Extract the required values dates = quotes[:, 0] closing_values = quotes[:, 4] volume_of_shares = quotes[1:, 5] # Take diff of closing values and computing rate of change diff_percentage = 100.0 * np.diff(closing_values) / closing_values[:-1] dates = dates[1:] # Stack the percentage diff and volume values column-wise for training X = np.column_stack([diff_percentage, volume_of_shares]) X = [x for x in X if x[0] != None and x[1] != None] # Create and train Gaussian HMM print "\nTraining HMM...." model = GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000) model.fit(X) # Generate data using model num_samples = 500 a, samples = model.sample(num_samples) plt.plot(np.arange(num_samples), samples, c='black') plt.show()
def predictions_mls(filename, company, refcompany, dt1, dt2, num_of_states, test_num): # Generate samples starting in the most likely actual current state days_future = 365 model = joblib.load(filename) quotes = quotes_historical_yahoo_ochl(company, dt1, dt2) dates = np.array([q[0] for q in quotes], dtype=int) close_v = np.array([q[2] for q in quotes]) volume = np.array([q[5] for q in quotes])[1:] # Take diff of close value. Note that this makes # len(diff) = len(close_t) - 1 therefore, other quantities also need to be shifted by 1 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] # Unpack quotes Company2 quotes2 = quotes_historical_yahoo_ochl(refcompany, dt1, dt2) close_v2 = np.array([q[2] for q in quotes2]) diff2 = np.diff(close_v2) close_v2 = close_v2[1:] #print (diff2.shape) delta = diff2.shape[0] - diff.shape[0] delta = abs(delta) diff0 = np.pad(diff, (delta, 0), mode='constant', constant_values=0) close_v = np.pad(close_v, (delta, 0), mode='constant', constant_values=0) #print (diff.shape) #print (diff0.shape) X = np.column_stack([diff0, diff2]) # Predict the most likely current internal hidden state hidden_probs = model.predict_proba(X) lstate_prob = hidden_probs[-1] days = int(days_future // total2active) # 251 open market days in a year print(days, strftime("%Y-%m-%d %H:%M:%S", gmtime())) #debugging purposes if (num_of_states > 1): startprob = np.zeros(num_of_states) startprob[lstate_prob.argmax()] = 1.0 else: startprob = [1.] model_2_sample = GaussianHMM(n_components=num_of_states, covariance_type="full") model_2_sample.startprob_ = startprob model_2_sample.transmat_ = model.transmat_ model_2_sample.means_ = model.means_ model_2_sample.covars_ = model.covars_ random.seed() rseed = random.randrange(0, max_int_value) X, Z = model_2_sample.sample(days, random_state=rseed) avg_prediction = 0 allpredictions = np.zeros((test_num, yr)) for test in range(test_num): final_price = close_v[-1] j = 0 for i in range(days): if ((final_price + X[i][0]) > 0): final_price += X[i][0] if (j > 1 and i % 5 == 0): allpredictions[test][j] = final_price allpredictions[test][j + 1] = final_price allpredictions[test][j + 2] = final_price j = j + 3 else: allpredictions[test][j] = final_price j = j + 1 while (j < allpredictions.shape[1]): allpredictions[test][j] = final_price j = j + 1 rseed = random.randrange(0, max_int_value) X, Z = model_2_sample.sample(days, random_state=rseed) predictions_year = allpredictions.mean(axis=0) print("Avg. Prediction: ", predictions_year[-1]) fname = "Year_of_predictions_" + str(company) + "_States_" + str( num_of_states) + "_adv.csv" fname = os.path.join('./sims3', fname) np.savetxt(fname, predictions_year, delimiter=",") return allpredictions[:, days_future - 2], allpredictions[:, (days_future - 2) / 4], allpredictions[:, (days_future - 2) / 36]
startprob = np.array([0.6, 0.3, 0.1, 0.0]) transmat = np.array([[0.7, 0.2, 0.0, 0.1], [0.3, 0.5, 0.2, 0.0], [0.0, 0.3, 0.5, 0.2], [0.2, 0.0, 0.2, 0.6]]) means = np.array([[0.0, 0.0], [0.0, 11.0], [9.0, 10.0], [11.0, -1.0]]) covars = .5 * np.tile(np.identity(2), (4, 1, 1)) model = GaussianHMM(n_components=4, covariance_type="full") model.startprob_ = startprob model.transmat_ = transmat model.means_ = means model.covars_ = covars X, state_sequence = model.sample(n_samples=5) plt.plot(X[:, 0], X[:, 1], ".-", label="observations", ms=6, mfc="orange", alpha=0.7) for i, m in enumerate(means): plt.text(m[0], m[1], 'Component %i' % (i + 1), size=12, horizontalalignment='center', bbox=dict(alpha=.7, facecolor='w')) plt.legend(loc='best') plt.show()
# Load input data data = np.loadtxt('data_1D.txt', delimiter=',') # Extract the data column (third column) for training X = np.column_stack([data[:, 2]]) # Create a Gaussian HMM num_components = 5 hmm = GaussianHMM(n_components=num_components, covariance_type='diag', n_iter=1000) # Train the HMM print('\nTraining the Hidden Markov Model...') hmm.fit(X) # Print HMM stats print('\nMeans and variances:') for i in range(hmm.n_components): print('\nHidden state', i + 1) print('Mean =', round(hmm.means_[i][0], 2)) print('Variance =', round(np.diag(hmm.covars_[i])[0], 2)) # Generate data using the HMM model num_samples = 1200 generated_data, _ = hmm.sample(num_samples) plt.plot(np.arange(num_samples), generated_data[:, 0], c='black') plt.title('Generated data') plt.show()
from hmmlearn.hmm import GaussianHMM from Slicing_time_series_data import read_data # hmmlearn实现了三种HMM模型类,按照观测状态是连续状态还是离散状态,可以分为两类。GaussianHMM和GMMHMM是连续观测状态的HMM模型,而MultinomialHMM是离散观测状态的模型,也是我们在HMM原理系列篇里面使用的模型。 #加载数据 data = np.loadtxt('data_1D.txt', delimiter=',') #提取第三列进行训练 x = np.column_stack([data[:, 2]]) #创建GaussianHMM,参数:状态的数量n_components=5,covariance_type='diag',“diag” - 每个状态使用对角协方差矩阵。n_iter要执行的最大迭代次数。 num_components = 5 hmm = GaussianHMM(n_components=num_components, covariance_type='diag', n_iter=1000) #训练HMM print("\n正在训练隐马尔科夫模型....") hmm.fit(x) #输出每个HMM状态的平均值和方差 print("\n均值和方差:") for i in range(hmm.n_components): print('\n隐状态', i + 1) print('均值 = ', round(hmm.means_[i][0], 2)) print("方差 = ", round(np.diag(hmm.covars_[i])[0], 2)) #生成1200条数据训练HMM模型并绘出 num_samples = 1200 generated_data, _ = hmm.sample(num_samples) #_约定不关心数字的变量,后期不使用 plt.plot(np.arange(num_samples), generated_data[:, 0], c='red') plt.title('Gnenerate data') plt.show()
mean = np.array([[0.0, 0.0], [0.0, 10.0], [10.0, 0.0]]) # Setting the mean model_gaussian.means_ = mean # As emission probability is a 2-D gaussian distribution, thus # covariance matrix for each state would be a 2-D matrix, thus # overall the covariance matrix for all the states would be in the # form of (n_components, 2, 2) covariance = 0.5 * np.tile(np.identity(2), (3, 1, 1)) model_gaussian.covars_ = covariance # model.sample returns both observations as well as hidden states # the first return argument being the observation and the second # being the hidden states Z, X = model_gaussian.sample(100) # Plotting the observations plt.plot(Z[:, 0], Z[:, 1], "-o", label="observations", ms=6, mfc="orange", alpha=0.7) # Indicate the state numbers for i, m in enumerate(mean): plt.text(m[0], m[1], 'Component %i' % (i + 1), size=17, horizontalalignment='center', bbox=dict(alpha=.7, facecolor='w')) plt.legend(loc='best') plt.show()
def hmm_calculate(self): """ Расчет Hidden Markov Models""" """ подготовим выбранные тиражи""" start = time.time() print("Начинаем считать в ", datetime.datetime.fromtimestamp(start).strftime("%d-%m-%y %H:%M:%S")) fromDraw= self.widget.spinBoxFromDraw.value() toDraw= self.widget.spinBoxToDraw.value() checkFromDraw= self.widget.spinBoxCheckFromDraw.value() checkToDraw= self.widget.spinBoxCheckToDraw.value() iter=self.widget.spinBoxIterations.value() if (toDraw-fromDraw)<=3: QMessageBox.warning(self, 'Предупреждение', "Обучающих примеров недостаточно", QMessageBox.Cancel ) return predictCount=self.widget.spinBoxPredictCount.value() draws=self.db.get_draws_balls_numpy(fromDraw,toDraw) if draws.size == 0: QMessageBox.warning(self, 'Предупреждение', "Обучающих примеров нет", QMessageBox.Cancel ) return checkDraws=self.db.get_draws_balls_numpy(checkFromDraw,checkToDraw) print("\n-checkDraws-\n") print(checkDraws) print(checkDraws.shape) if checkDraws.size == 0: QMessageBox.warning(self, 'Предупреждение', "Проверочных примеров нет", QMessageBox.Cancel ) return # Create a Gaussian HMM num_components = 7 if (len(draws)/2)<num_components: num_components=int(len(draws)/2-1); if num_components<1: num_components=1 print("num_components=",num_components) covar_type = str(self.widget.comboBoxCovarianceType.currentText()) print("используем ",covar_type) """n_components — определяет число скрытых состояний. Относительно неплохие модели можно строить, используя 6-8 скрытых состояний. Habr. Но у меня при больших значениях могло выдать ошибку rows of transmat_ must sum to 1.0 - видимо зависит от числа обучающих примеров Остальные параметры отвечают за сходимость EM-алгоритма, ограничивая число итераций, точность и определяя тип ковариационных параметров состояний. https://habr.com/ru/post/351462/ """ try: hmm = GaussianHMM(n_components=num_components, covariance_type=covar_type, n_iter=iter) #tied дает ошибки для 4x20 при малом наборе # Train the HMM https://ogrisel.github.io/scikit-learn.org/sklearn-tutorial/modules/generated/sklearn.hmm.GaussianHMM.html print('\nTraining the Hidden Markov Model...') with warnings.catch_warnings(): warnings.simplefilter('ignore') hmm.fit(draws) # Print HMM stats print('\nMeans and variances:') for i in range(hmm.n_components): print('\nHidden state', i+1) print('Mean =', round(hmm.means_[i][0], 2)) print('Variance =', round(np.diag(hmm.covars_[i])[0], 2)) print("\n-Generate data using the HMM model-\n") # Generate data using the HMM model predicted_data, _ = hmm.sample(predictCount) predicted_data=np.array(predicted_data,dtype=int) #преобразуем в int print('predicted_data:', predicted_data,", type/shape/ndim ", type(predicted_data),predicted_data.shape,predicted_data.ndim) self.print_results(predicted_data,checkDraws) end = time.time() print("затрачено: ",time.strftime('%H:%M:%S', time.gmtime(end - start))) except Exception as e: print('HMM:HmmCalculate error: ', e) dbg_except() self.widget.plainTextEdit.setPlainText(str(e)) pass #end HmmCalculate
# ts, data = util.load_data("../data/beijing_pm25.csv", columnName="pm2.5") # ts, data = util.load_data("../data/pollution.csv", columnName="Ozone") train, test = util.divideTrainTest(data) print("train shape is", train.shape) print("test shape is", test.shape) history = [x[0] for x in train] predictions = [] realTestY = [] for t in range(len(test)): model = GaussianHMM(n_components=2) model.fit(train) output = model.sample(1) yhat = output[0][0] predictions.append(yhat) obs = test[t][0] train = np.append(train, obs).reshape(-1, 1) realTestY.append(obs) print('t:%d, predicted=%f, expected=%f' % (t, yhat, obs)) realTestY = np.array(test) predictions = np.array(predictions).reshape(-1) print("pred:", predictions) MAE = eval.calcMAE(realTestY, predictions) RMSE = eval.calcRMSE(realTestY, predictions) MAPE = eval.calcSMAPE(realTestY, predictions)
mean = np.array([[0.0, 0.0], [0.0, 10.0], [10.0, 0.0]]) # Setting the mean model_gaussian.means_ = mean # As emission probability is a 2-D gaussian distribution, thus # covariance matrix for each state would be a 2-D matrix, thus # overall the covariance matrix for all the states would be in the # form of (n_components, 2, 2) covariance = 0.5 * np.tile(np.identity(2), (3, 1, 1)) model_gaussian.covars_ = covariance # model.sample returns both observations as well as hidden states # the first return argument being the observation and the second # being the hidden states Z, X = model_gaussian.sample(100) # Plotting the observations plt.plot(Z[:, 0], Z[:, 1], "-o", label="observations", ms=6, mfc="orange", alpha=0.7) # Indicate the state numbers for i, m in enumerate(mean): plt.text(m[0], m[1], 'Component %i' % (i + 1),
intc = yf.Ticker('INTC').history(start=start_date, end=end_date) # Take the percentage difference of closing stock prices diff_percentages = 100.0 * np.diff(intc.Close) / intc.Close[:-1] # Stack the differences and volume values column-wise for training training_data = np.column_stack([diff_percentages, intc.Volume[:-1]]) # Create and train Gaussian HMM hmm = GaussianHMM(n_components=7, covariance_type='diag', n_iter=1000) with warnings.catch_warnings(): warnings.simplefilter('ignore') hmm.fit(training_data) # Generate data using the HMM model num_samples = 300 samples, _ = hmm.sample(num_samples) # Plot the difference percentages plt.figure() plt.title('Difference percentages') plt.plot(np.arange(num_samples), samples[:, 0], c='black') # Plot the volume of shares traded plt.figure() plt.title('Volume of shares') plt.plot(np.arange(num_samples), samples[:, 1], c='black') plt.ylim(ymin=0) plt.show()
#%% from hmmlearn.hmm import GaussianHMM model = GaussianHMM(n_components=8, covariance_type="diag", n_iter=1000) model.fit(dataset_X) #%% hidden_states = model.predict(dataset_X) #%% for i in range(model.n_components): # 打印出每个隐含状态 mean = model.means_[i][0] variance = np.diag(model.covars_[i])[0] print('Hidden state: {}, Mean={:.3f}, Variance={:.3f}'.format( (i + 1), mean, variance)) #%% # 使用HMM模型生成数据 N = 2348 samples, _ = model.sample(N) plt.plot(samples[:, 0]) #%% print(samples) import numpy numpy.savetxt("Hours_HMMpred.csv", samples, delimiter=',') #%% plt.plot(dataset_X[:N], c='red', label='train') # 将实际涨幅和预测的涨幅绘制到一幅图中方便比较 plt.plot(samples[:, 0], c='blue', label='Predicted') plt.legend() #%% for i in [8, 12, 16, 18, 20]: model = GaussianHMM(n_components=i, covariance_type="diag", n_iter=1000) model.fit(dataset_X) samples, _ = model.sample(400)
print('\n\nParameters of appliances model.\n- Transition matrix: \n', appl_hmm.transmat_) logProb = appl_hmm.score(appl_data) print('\n Log likelihood: \n', round(logProb, 2)) # LIGHTS print('\n\nParameters of lights model.\n- Transition matrix: \n', lights_hmm.transmat_) logProb = lights_hmm.score(np.reshape(appl_data, [len(lights_data), 1])) print('\n- Log likelihood: \n', round(logProb, 2)) ''' SAMPLING ''' # Generate new samples (visible, hidden) # APPL X1, Z1 = appl_hmm.sample( 143) # 143 is the number of measurement per day (24 hours) plt.figure(2) plt.plot(X1) plt.plot(Z1 * 10) plt.title('Samples generated for appliances') plt.xlabel('Samples') # LIGHTS X2, Z2 = lights_hmm.sample(143) plt.figure(3) plt.plot(X2) plt.plot(Z2) plt.title('Samples generated for lights') plt.xlabel('Samples') ''' PROB DISTRIB COMPARISON '''
import numpy as np import matplotlib.pyplot as plt from hmmlearn.hmm import GaussianHMM # 从输入文件中加载数据 input_file = 'CNY.csv' data = np.loadtxt(input_file, delimiter=',') # 提取需要的值 closing_values = np.array(data[:, 6]) volume_of_shares = np.array(data[:, 8])[:-1] # 计算每天收盘价变化率 diff_percentage = 100.0 * np.diff(closing_values) / closing_values[:-1] # 将变化率与交易量组合起来 X = np.column_stack((diff_percentage, volume_of_shares)) # 创建并训练高斯隐马尔科夫模型 print(u"训练高斯隐马尔科夫模型中......") model = GaussianHMM(n_components=5, covariance_type='diag', n_iter=1000) model.fit(X) # 用模型生成数据 num_samples = 500 samples, _ = model.sample(num_samples) plt.plot(np.arange(num_samples), samples[:, 0], c='black') plt.figure() plt.plot(np.arange(num_samples), samples[:, 1], c='red') plt.show()
# 训练高斯HMM模型 model = GaussianHMM(n_components=8, covariance_type="diag", n_iter=1000) model.fit(transaction) #%% #打印出每个隐含状态 for i in range(model.n_components): mean = model.means_[i][0] variance = np.diag(model.covars_[i])[0] print('Hidden state: {}, Mean={:.3f}, Variance={:.3f}'.format( (i + 1), mean, variance)) #%% #使用HMM模型生成数据 N = 385 samples, _ = model.sample(N) plt.plot(samples[:, 0]) #%% plt.plot(np.arange(N), samples[:, 0]) plt.title('Number of components = ' + str(N)) plt.show() #%% plt.plot(feature1[:N], c='red', label='Rise') # 将实际涨幅和预测的涨幅绘制到一幅图中方便比较 plt.plot(samples[:, 0], c='blue', label='Predicted') plt.legend() #%% plt.plot(feature2[:N], c='red', label='numbers') plt.plot(samples[:, 1], c='blue', label='Predicted')