def bench_gaussian_hmm(size): title = "benchmarking Gaussian HMM on a sample of size {0}".format(size) print(title.center(36, " ")) ghmm = GaussianHMM() ghmm.means_ = [[42], [24]] ghmm.covars_ = [[1], [1]] with timed_step("generating sample"): sample, _states = ghmm.sample(size) with timed_step("fitting"): fit = GaussianHMM(n_components=2).fit([sample]) with timed_step("estimating states"): fit.predict(sample)
def mainHMM(filePrefix): X_train, length_train, X_test, length_test = loadOneRoute(filePrefix) # Run Gaussian HMM print "fitting to HMM and decoding ..." model = GaussianHMM(n_components=4, covariance_type="diag", n_iter=2000).fit(X_train[:, 0:5], length_train) hidden_states = model.predict(X_test[:, 0:5], length_test) print "done" print hidden_states[0:20] print hidden_states[20:40] print hidden_states[40:60] print hidden_states[60:80] # Print trained parameters and plot print("Transition matrix") print(model.transmat_) print("Start Prob") print(model.startprob_) print("Means and vars of each hidden state") for i in range(model.n_components): print("{0}th hidden state".format(i)) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i])) print np.array(hidden_states).reshape((sum(length_test), 1))
def fit(self): if self.verbose: print "[Clustering] Clearing old model and segmentation" self.segmentation = [] self.model = [] new_segments = [] new_model = [] g = GaussianHMM(n_components=self.n_components) all_demos = self._demonstrations[0] lens = [np.shape(self._demonstrations[0])[0]] for i in range(1, len(self._demonstrations)): all_demos = np.concatenate([all_demos,self._demonstrations[i]]) lens.append(np.shape(self._demonstrations[i])[0]) g.fit(all_demos,lens) for d in self._demonstrations: new_segments.append(self.findTransitions(g.predict(d))) #print g.predict(d) new_model.append(g) self.segmentation = new_segments self.model = new_model
def main(args): x, X = loadDiffRows(args.diffFile) model = GaussianHMM(n_components=3, covariance_type="diag", n_iter=100000000000) model.transmat_ = numpy.array([[0.5, 0.5, 0.0], [0.0, 0.5, 0.5], [0.0, 0.0, 1.0]]) model.fit(X) print(model.transmat_) model.transmat_[0][2] = 0. model.transmat_[1][0] = 0. model.transmat_[2][0] = 0. model.transmat_[2][1] = 0. exp = args.outFile.split('/')[-1].split('_')[0] with open(args.outFile, 'w') as fout: print('exp\tbin\treads\tstate', file=fout) for seq in X: hiddenStates = model.predict(seq) for idx,v in enumerate(zip(x,hiddenStates)): r,h = v print(exp + '\t' + str(idx) + '\t' + str(r) + '\t' + str(h), file=fout)
class HMM: __slots__ = [ "model" ] def __init__(self): pass def draw(self, data): figure() plot(range(len(data)),data,alpha=0.8,color='red') show() def train(self, data, n_components): print("Training Data: %s" % data) self.data = data self.model = GaussianHMM(n_components, algorithm='viterbi', covariance_type='diag') X = np.reshape(data, (len(data),1)) self.model = self.model.fit([X]) self.hidden_states = self.model.predict(X) print("Sequence of States: " % self.hidden_states) def eval(self, obs): print("Testing Data: %s" % obs) X = np.reshape(obs, (len(obs),1)) print("Eval: %s" % str(self.model.score(X))) def plot(self): fig = figure(facecolor="white") ax = fig.add_subplot(111) for i in range(self.model.n_components): # use fancy indexing to plot data in each state idx = (self.hidden_states == i) ax.plot(np.array(range(len(self.data)))[idx], np.array(self.data)[idx], '.', label="State %d" % (i+1)) ax.legend() show()
class HmmClassifier(): def __init__(self, referenceSeqs, inputSeq): self.referenceSeqs = referenceSeqs self.inputSeq = inputSeq # feel free to change this model self.model = GaussianHMM(n_components=2, covariance_type="full", n_iter=2000) def predict(self): probs = [] for referenceSeq in self.referenceSeqs: #print "reference: {}".format(referenceSeq) self.model.fit(referenceSeq) hidden_states = self.model.predict(referenceSeq) prob = self.model.score(self.inputSeq) probs.append(prob) # return the index of the max prob return probs.index(max(probs))
def calculate_weights(self, date, amount): if self.stacked == False: for elements in self.tradingDates: if elements.get('dt') >= self.start_date and elements.get('dt') <= date : self.trainingDates.append(elements['dt']) for assetCode in self.asset_codes: assetValues = [] # for each_date in self.trainingDates: # assetValues.append(StockData.objects.filter(dt=each_date,ticker=assetCode).values("price_close")[0]['price_close']) assetValues = [StockData.objects.filter(dt=each_date,ticker=assetCode).values("price_close")[0]['price_close'] for each_date in self.trainingDates] self.historical_Data[assetCode] = assetValues self.stacked = True else: assetValues = [] for assetCode in self.asset_codes: self.historical_Data[assetCode].append(StockData.objects.filter(dt=date,ticker=assetCode).values("price_close")[0]['price_close']) target = {'money': amount} for assetCode in self.asset_codes: close_v = np.array(self.historical_Data[assetCode]) diff = np.diff(close_v) X = np.column_stack([diff]) model = GaussianHMM(n_components=2, covariance_type="diag", n_iter=1000).fit(X) hidden_states = model.predict(X) stableProb = 0 if hidden_states[len(hidden_states) - 1] == 1: stableProb = model.transmat_[1][1] else: stableProb = 0 target[assetCode] = stableProb target['money'] -= stableProb * close_v[len(close_v) - 1] self.weight = [] self.weight.append(target['money']) # for assetCode in self.asset_codes: # self.weight.append(target[assetCode]) self.weight += [target[assetCode] for assetCode in self.asset_codes] return self.weight
def hmmtest(trade_data, test_data): # pack diff and volume for training # delete record containng infinity X = test_data[test_data['Strategy_Gross_Return_RDP_5'] != float("inf")] X = test_data ############################################################################### # Run Gaussian HMM #print("fitting to HMM and decoding ...", end='') n_components = 4 covariance_type = 'full' n_iter = 1000 # make an HMM instance and execute fit model = GaussianHMM(n_components=n_components, covariance_type=covariance_type, n_iter=n_iter).fit(X) #model= GMMHMM(n_components=4,n_mix=3,covariance_type="diag", n_iter=100).fit(X) # model = MultinomialHMM(n_components=4, n_iter=100).fit(X) # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) #print("done\n") ############################################################################### # print trained parameters and plot #print("Transition matrix") #print(model.transmat_) #print() print("means and vars of each hidden state") for i in range(model.n_components): print("%dth hidden state" % i) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i])) plotHmmState(model, hidden_states, trade_data) return model
def hmm_weight(df, data_raw, day, n_components, plot=False): tr_start, tr_end, te_start, te_end = train_test(day, df) col_list = ['update_date', 'open', 'high', 'low', 'close'] df = df.loc[:, col_list] df = df.dropna(axis=0) data_raw = data_raw.loc[:, col_list] data_raw = data_raw.dropna(axis=0) train_df = df.loc[df['update_date'] >= tr_start, :].loc[ df['update_date'] <= tr_end, :] test_df = df.loc[df['update_date'] >= te_start, :].loc[ df['update_date'] <= te_end, :] train_close = data_raw.loc[data_raw['update_date'] >= tr_start, :].loc[ data_raw['update_date'] <= tr_end, :] test_close = data_raw.loc[data_raw['update_date'] >= te_start, :].loc[ data_raw['update_date'] <= te_end, :] if len(train_df) > 0 and len(test_df) > 0: r_5 = np.array( np.array(np.log(train_df['close'][5:])) - np.array(np.log(train_df['close'][:-5])))[:] # r_10 = np.array(np.array(np.log(train_df['close'][10:])) - np.array(np.log(train_df['close'][:-10]))) r_1 = np.array( np.array(np.log(train_df['close'][1:])) - np.array(np.log(train_df['close'][:-1])))[4:] r_range = np.array((np.array(np.log(train_df['high'])) - np.array(np.log(train_df['low']))))[5:] r_1 = np.array( map( lambda x: 0 if x == np.inf or x == -np.inf or np.isnan(x) else x, r_1)) r_5 = np.array( map( lambda x: 0 if x == np.inf or x == -np.inf or np.isnan(x) else x, r_5)) # r_10 = np.array(map(lambda x: 0 if x==np.inf or x==-np.inf or np.isnan(x) else x, r_10)) r_range = np.array( map( lambda x: 0 if x == np.inf or x == -np.inf or np.isnan(x) else x, r_range)) r_1_no_lag = list(r_1[1:]) r_1_no_lag.append(0) r_1_no_lag = np.array(r_1_no_lag) date_list = train_df['update_date'][5:] r_5_test = np.array( np.array(np.log(test_df['close'][5:])) - np.array(np.log(test_df['close'][:-5])))[:] # r_10_test = np.array(np.array(np.log(test_df['close'][10:])) - np.array(np.log(test_df['close'][:-10]))) r_1_test = np.array( np.array(np.log(test_df['close'][1:])) - np.array(np.log(test_df['close'][:-1])))[4:] r_1_test = np.array( map( lambda x: 0 if x == np.inf or x == -np.inf or np.isnan(x) else x, r_1_test)) r_5_test = np.array( map( lambda x: 0 if x == np.inf or x == -np.inf or np.isnan(x) else x, r_5_test)) # r_10_test = np.array(map(lambda x: 0 if x==np.inf or x==-np.inf or np.isnan(x) else x, r_10_test)) r_1_test_no_lag = list(r_1_test[1:]) r_1_test_no_lag.append(0) r_1_test_no_lag = np.array(r_1_test_no_lag) r_range_test = np.array( np.array(np.log(test_df['high'])) - np.array(np.log(test_df['low'])))[5:] r_range_test = np.array( map( lambda x: 0 if x == np.inf or x == -np.inf or np.isnan(x) else x, r_range_test)) date_list_test = test_df['update_date'][5:] X = np.column_stack([r_1, r_5, r_range]) X_test = np.column_stack([r_1_test, r_5_test, r_range_test]) if X.shape[0] >= n_components and X_test.shape[0] >= n_components: hmm = GaussianHMM(n_components=n_components, covariance_type='diag', n_iter=2000).fit(X) latent_states_sequence_train = hmm.predict(X) mean_return_dict = {} if plot == True: import matplotlib.pyplot as plt import seaborn as sns sns.set_style('white') plt.figure(figsize=(15, 8)) for i in range(hmm.n_components): state = (latent_states_sequence_train == i) sharpe = (np.mean(r_1_no_lag[state]) * 252 - 0.03) / ( np.std(r_1_no_lag[state]) * np.sqrt(252)) plt.plot(date_list[state], train_close['close'][state], 'o', label='latent state %d: %s' % (i, sharpe), lw=5) plt.legend() plt.grid(1) mean_return_dict[i] = sharpe plt.show() else: for i in range(hmm.n_components): state = (latent_states_sequence_train == i) mean_return_dict[i] = (np.mean(r_1_no_lag[state]) * 252 - 0.03) / (np.std(r_1_no_lag[state]) * np.sqrt(252)) latent_states_sequence_test = hmm.predict(X_test) pair = mean_return_dict.items() pair = filter(lambda x: False if np.isnan(x[1]) else True, pair) pair_sorted = sorted(pair, key=lambda x: x[1]) highest = pair_sorted[-1] lowest = pair_sorted[0] # print pair_sorted expected_return_series = map(lambda x: mean_return_dict[x], latent_states_sequence_test) expected_return_series = np.array( map(lambda x: 1 if x > 0 else -1, expected_return_series[:-1])) real_return_series = r_1_test[1:] real_return_series = np.array( map(lambda x: 1 if x > 0 else -1, real_return_series)) temp = expected_return_series - real_return_series temp = filter(lambda x: True if np.isnan(x) == False else False, temp) # acc_rate=(len(temp) - np.sum(np.abs(temp)) / 2.) / len(temp) # print acc_rate real_return_series = list(real_return_series) # print real_return_series.count(1) / float(len(real_return_series)) # print real_return_series.count(-1) / float(len(real_return_series)) # print 'time: ',np.max(date_list_test),'expected Sharpe: ',mean_return_dict[latent_states_sequence_test[-1]] prediction = pd.DataFrame() prediction['update_date'] = date_list_test prediction['state'] = latent_states_sequence_test prediction['expected_sharpe'] = prediction['state'].apply( lambda x: mean_return_dict[x]) if plot == True: sns.set_style('white') plt.figure(figsize=(8, 4)) for i in range(hmm.n_components): state = (latent_states_sequence_test == i) plt.plot(date_list_test[state], test_close['close'][state], 'o', label='latent state %d: %s' % (i, mean_return_dict[i]), lw=5) plt.grid(1) plt.legend() plt.show() else: pass if plot == True: sns.set_style('white') plt.figure(figsize=(15, 10)) # plt.subplot(2,1,1) new_frame = copy.deepcopy(prediction) new_frame.index = [new_frame['update_date']] new_frame['expected_return'] = new_frame[ 'expected_sharpe'].apply(lambda x: 30 if x > 0 else -30) test_close.index = [test_close['update_date']] test_close['close'] = test_close['close'] - 420 test_close = test_close[np.min(new_frame['update_date']):np. max(new_frame['update_date'])] plt.plot(test_close['close'], 'o-', color='red') # plt.subplot(2,1,2) plt.bar(new_frame.index, new_frame['expected_return'], align='edge', alpha=0.5, color='yellow') plt.show() return prediction, highest, lowest else: return None, None, None else: return None, None, None
user_id_list.append(line) length = int(file.readline()) review_time = list() for i in range(length): info = file.readline().split('\t') review_time.append( datetime.datetime(int(info[3][:4]), int(info[3][5:7]), int(info[3][8:10]))) X.append(interval(review_time)) lengths.append(length - 1) file.readline() X = np.concatenate(X) warnings.filterwarnings("ignore") model = GaussianHMM(n_components=20, n_iter=10000, tol=1, verbose=True) model.fit(X, lengths) if model.monitor_.converged: print(model.transmat_) print(model.means_) print(model.covars_) hidden_state = model.predict(X, lengths) start = 0 with open("hidden_states.txt", 'w') as file: for i in range(len(user_id_list)): file.write(user_id_list[i]) for j in range(lengths[i]): file.write(str(hidden_state[start + j]) + '\t') start += lengths[i] file.write('\n\n')
mus = np.flipud(mus) sigmas = np.flipud(sigmas) P = np.fliplr(np.flipud(P)) hidden_states = 1 - hidden_states return hidden_states, mus, sigmas, P, logProb, samples ''' # %% Q = data.iloc[10, 6] # hidden_states, mus, sigmas, P, logProb, samples = fitHMM(Q, 100) model = GaussianHMM(n_components=4, n_iter=500).fit(np.reshape(Q, [len(Q), 1])) hidden_states = model.predict(np.reshape(Q, [len(Q), 1])) # find parameters of Gaussian HMM mus1 = np.array(model.means_) sigmas = np.array( np.sqrt( np.array([ np.diag(model.covars_[0]), np.diag(model.covars_[1]), np.diag(model.covars_[2]), np.diag(model.covars_[3]) ]))) P = np.array(model.transmat_) # %% print(model.covars_)
def hmmmodel(seq): model = GaussianHMM(n_components=2, n_iter=1000) model.fit(seq) hidden_states = model.predict(seq) return model, hidden_states
start = datetime.datetime(2013, 1, 1) end = pd.datetime.today() df = web.DataReader("GOOGL", 'google', start, end) datestart = '20130101' dateend = '20160101' # dates, close_v, volume_v, high_v, open_v, low_v = get_value_by_dates(df, datestart, dateend) # X = np.column_stack([close_v, volume_v, high_v, open_v, low_v]) X, dates, close_v, volume_v, high_v, open_v, low_v = get_value_by_dates( df, datestart, dateend) model = GaussianHMM(n_components=100, covariance_type="tied", n_iter=100, init_params='m', verbose=True).fit(X) hidden_states = model.predict(X) print(hidden_states) # print("Transition matrix") # print(model.transmat_) # print() print("Means and vars of each hidden state") for i in range(model.n_components): print("{0}th hidden state".format(i)) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i])) print() # fig, axs = plt.subplots(model.n_components, sharex=True, sharey=True) # colours = cm.rainbow(np.linspace(0, 1, model.n_components))
def pain_state(path, hmm_train_days=0, n_components=3): with open(path, 'r') as f: titles = f.readline()[:-1].split(',') datas = [] need_to_append = (not titles[-1] == 'state') if need_to_append: titles.append('state') line = f.readline() while line: line = line[:-1].split(',') if need_to_append: line.append('0') datas.append(line) line = f.readline() with open(path, 'w') as f: f.write('%s\n' % (','.join(titles))) index = 0 open_price = [] high_price = [] low_price = [] close_price = [] volume = [] min_train_days = hmm_train_days + 5 while index < len(datas) and (len(close_price) < min_train_days - 1 or hmm_train_days == 0): f.write('%s\n' % (','.join(datas[index]))) v = float(datas[index][5]) if v > 0: open_price.append(float(datas[index][1])) high_price.append(float(datas[index][2])) low_price.append(float(datas[index][3])) close_price.append(float(datas[index][4])) volume.append(v) index += 1 while index < len(datas): v = float(datas[index][5]) if v > 0: open_price.append(float(datas[index][1])) high_price.append(float(datas[index][2])) low_price.append(float(datas[index][3])) close_price.append(float(datas[index][4])) volume.append(v) logDel = np.log(np.array( high_price[-hmm_train_days:])) - np.log( np.array(low_price[-hmm_train_days:])) logRet_2 = np.log(np.array( close_price[-hmm_train_days:])) - np.log( np.array(open_price[-hmm_train_days - 2:-2])) logRet_5 = np.log(np.array( close_price[-hmm_train_days:])) - np.log( np.array(close_price[-hmm_train_days - 5:-5])) logVol_5 = np.log(np.array(volume[-hmm_train_days:])) - np.log( np.array(volume[-hmm_train_days - 5:-5])) A = np.column_stack([logVol_5, logRet_5, logRet_2]) model = GaussianHMM(n_components=n_components, covariance_type='full', n_iter=16).fit(A) means = np.array([ele[1] for ele in model.means_]) ids = np.argsort(means) state = model.predict(A)[-1] for i in range(n_components): if state == ids[i]: state = i + 1 break datas[index][-1] = "%d" % state f.write('%s\n' % (','.join(datas[index]))) index += 1
close_v = data['close'].values volume = data['volume'].values dates = np.array([i for i in range(data.shape[0])]) fig1 = plt.figure() plt.plot(close_v, color='blue') plt.show() fig1.savefig('stocks.svg') # 处理数据 diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] volume = volume[1:] x = np.column_stack([diff, volume]) diff = diff.reshape(-1, 1) # 二维矩阵 model = GaussianHMM(n_components=2, n_iter=1000) # n_components 状态序列的种类,n_iter 迭代次数 model.fit(diff) hidden_states = model.predict(diff) fig2 = plt.figure() colors = ['yellow', 'blue'] for j in range(len(close_v) - 1): for i in range(model.n_components): if hidden_states[j] == i: plt.plot([dates[j], dates[j + 1]], [close_v[j], close_v[j + 1]], color=colors[i]) plt.show() fig2.savefig('hidden_state.svg') # 分为震荡和剧烈涨跌
# + hmm = GaussianHMM(n_components=2, means_prior=np.zeros((1, 1)), means_weight=1e10).fit(dx.reshape(-1, 1)) # rearrange the volatility from small to large sigma2 = hmm.covars_.flatten() idx = np.argsort(sigma2) sigma2 = sigma2[idx] p = hmm.transmat_[np.ix_(idx, idx)] # transaction matrix # - # ## [Step 3](https://www.arpm.co/lab/redirect.php?permalink=s_hidden_markov_model_stocks-implementation-step03): Compute the hidden status z_ = hmm.predict(dx.reshape(-1, 1)) z = z_.copy() z[z_ == 0] = idx[0] z[z_ == 1] = idx[1] # ## Plots # + plt.style.use('arpm') panic = dx.copy() calm = dx.copy() panic[z == 0] = np.nan calm[z == 1] = np.nan fig = plt.figure()
print oddsr1 #7. Evaluate the effect of the online channel and billpay on customer's retention using a hidden Markov model (HMM) with the variables 9Online, 9Billpay, 0Online, 0Billpay and the new variable Retain. Retain takes a value of 0 when 0Profit has a missing observation and 1 otherwise. #print pd.value_counts(df['0Online'].isnull()) #print pd.value_counts(df['0Billpay'].isnull()) df['0OnlineNA'] = np.where(df['0Online'].isnull(), 0, df['0Online']) df['0BillpayNA'] = np.where(df['0Billpay'].isnull(), 0, df['0Billpay']) df.head() from hmmlearn.hmm import GaussianHMM group = df[['9Online', '9Billpay', '0OnlineNA', '0BillpayNA', 'Retain']] model = GaussianHMM(n_components=4, covariance_type="diag").fit(group) hidden_states = model.predict(group) for i in range(model.n_components): print("{0}th hidden state".format(i)) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i])) print() #hdf=pd.DataFrame(hidden_states) #hdf.describe() #verifying the meaning of the hidden states #8. Build a transition matrix (online, billpay) from 1999 to 2000 from those different customers' states:those that were online, offline without electronic billpay, online with electronic billpay for 2000, customers who left the bank. Explain the billpay effect on customers' retention. print("Transition matrix") print(model.transmat_) print()
variances.append(np.var(dists)) return variances if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--features-path", type=str) parser.add_argument("--save-path", type=str, default=None) fileConfig('logging_config.ini') args = parser.parse_args() save_path = Path(args.save_path) if not save_path.parent.exists(): save_path.parent.mkdir() features_path = Path(args.features_path) X = np.load(str(features_path)) hmm_n_clusters = 2 hmm = GaussianHMM(n_components=hmm_n_clusters, covariance_type="diag") hmm.fit(X) hmm_preds = hmm.predict(X) variances = get_cluster_variances(X, hmm_preds, hmm.means_) court_cluster_id = np.argmin(variances) mask = hmm_preds == court_cluster_id mask = [{'action': int(i)} for i in mask] utils.write_json_lines(mask, save_path) # np.save(str(save_path), mask)
def test_hmm(self): sns.set_style('white') beginDate = '20100401' endDate = '20160317' data = DataAPI.MktIdxdGet(ticker='000001', beginDate=beginDate, endDate=endDate, field=[ 'tradeDate', 'closeIndex', 'lowestIndex', 'highestIndex', 'turnoverVol' ], pandas="1") data1 = DataAPI.FstTotalGet(exchangeCD=u"XSHE", beginDate=beginDate, endDate=endDate, field=['tradeVal'], pandas="1") data2 = DataAPI.FstTotalGet(exchangeCD=u"XSHG", beginDate=beginDate, endDate=endDate, field=['tradeVal'], pandas="1") tradeVal = data1 + data2 tradeDate = pd.to_datetime(data['tradeDate'][5:]) volume = data['turnoverVol'][5:] closeIndex = data['closeIndex'] deltaIndex = np.log(np.array(data['highestIndex'])) - np.log( np.array(data['lowestIndex'])) deltaIndex = deltaIndex[5:] logReturn1 = np.array(np.diff(np.log(closeIndex))) logReturn1 = logReturn1[4:] logReturn5 = np.log(np.array(closeIndex[5:])) - np.log( np.array(closeIndex[:-5])) logReturnFst = np.array(np.diff(np.log(tradeVal['tradeVal'])))[4:] closeIndex = closeIndex[5:] X = np.column_stack( [logReturn1, logReturn5, deltaIndex, volume, logReturnFst]) # Make an HMM instance and execute fit model = GaussianHMM(n_components=6, covariance_type="diag", n_iter=1000).fit([X]) # Predict the optimal sequence of internal hidden state hidden_states = model.predict(X) #print("Transition matrix") #print(model.transmat_) #print() #print("Means and vars of each hidden state") #for i in range(model.n_components): # print("{0}th hidden state".format(i)) # print("mean = ", model.means_[i]) # print("var = ", np.diag(model.covars_[i])) plt.figure(figsize=(15, 8)) for i in range(model.n_components): idx = (hidden_states == i) plt.plot_date(tradeDate[idx], closeIndex[idx], '.', label='%dth hidden state' % i, lw=1) plt.legend() plt.grid(1)
deltaIndex = np.log(np.array(data['highestIndex'])) - np.log( np.array(data['lowestIndex'])) #3 当日对数高低价差 deltaIndex = deltaIndex[5:] logReturn1 = np.array(np.diff(np.log(closeIndex))) #4 对数收益率 logReturn1 = logReturn1[4:] logReturn5 = np.log(np.array(closeIndex[5:])) - np.log( np.array(closeIndex[:-5])) # 5日 对数收益差 logReturnFst = np.array(np.diff(np.log(tradeVal['tradeVal'])))[4:] closeIndex = closeIndex[5:] X = np.column_stack([logReturn1, logReturn5, deltaIndex, volume, logReturnFst]) # 将几个array合成一个2Darray # Make an HMM instance and execute fit model = GaussianHMM(n_components=3, covariance_type="diag", n_iter=1000).fit([X]) # Predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print hidden_states res = pd.DataFrame({ 'tradeDate': tradeDate, 'logReturn1': logReturn1, 'logReturn5': logReturn5, 'volume': volume, 'hidden_states': hidden_states }).set_index('tradeDate') for i in range(model.n_components): idx = (hidden_states == i) idx = np.append(0, idx[:-1]) #获得状态结果后第二天进行买入操作 #fast factor backtest df = res.logReturn1 res['sig_ret%s' % i] = df.multiply(idx, axis=0) res['sig_cumret%s' % i] = np.exp(res['sig_ret%s' % i].cumsum())
def train_a_stock(key, num_hidden_states): stock = StockDb() stock.df['fracchange'] = (stock.df['close'] - stock.df['open']) / stock.df['open'] stock.df['fraclow'] = (stock.df['low'] - stock.df['open']) / stock.df['open'] stock.df['frachigh'] = (stock.df['high'] - stock.df['open']) / stock.df['open'] stock.df['frachighlow'] = (stock.df['high'] - stock.df['low']) / stock.df['low'] stock.df['delta-volume'] = stock.df['volume'].diff().fillna(0) stock.df['delta-open'] = stock.df['open'].diff().fillna(0) stock.df['delta-close'] = stock.df['close'].diff().fillna(0) stock.df['delta-closeopen'] = stock.df['open'] - stock.df['close'].shift( -1).fillna(0) startdate = datetime.datetime(2018, 3, 6, 0, 0) enddate = datetime.datetime(2018, 4, 2, 0, 0) nbpredict = 40 features = ['delta-closeopen', 'fracchange', 'delta-volume'] features_predict = ['date', 'open', 'low', 'high', 'close'] predict_df = pd.DataFrame(columns=features_predict) for i in range(nbpredict): try: train_df = stock.build_training(key, startdate, enddate, features) X, lengths = train_df.seq_len_dict(key) model = GaussianHMM(n_components=num_hidden_states, n_iter=1000).fit(X, lengths) logL = model.score(X, lengths) state_sequence = model.predict(X, lengths) prob_next_step = model.transmat_[state_sequence[-1], :] state_most_probable = state_sequence[0] state_feaures = model.means_[state_most_probable] result = {} result['date'] = train_df.nextdata['date'].iloc[-2] result['logL'] = logL result['nbState'] = model.n_components result['state_most_probable'] = state_most_probable result['prob_state_most_probable'] = max(prob_next_step) result['open'] = train_df.nextdata['close'].iloc[ -1] + state_feaures[features.index('delta-closeopen')] result['close'] = result['open'] * ( 1 + state_feaures[features.index('fracchange')]) result['fracchange'] = state_feaures[features.index('fracchange')] result['delta-closeopen'] = state_feaures[features.index( 'delta-closeopen')] predict_df = predict_df.append(result, ignore_index=True) except: print("err") enddate = train_df.nextdata['date'].iloc[-2] real_df = stock.df[stock.df['symbol'] == key] res_df = pd.merge(real_df, predict_df, on='date') res_df['err'] = (res_df['fracchange_x'] - res_df['fracchange_y']) / res_df['fracchange_x'] print("----------------------------------------") # ======================== # Plot the data # ======================== fig, axes = plt.subplots(nrows=1, ncols=2) res_df.plot(kind='line', color='Blue', x='date', y='fracchange_x', ax=axes[0], title='fracchange') res_df.plot(kind='line', color='red', x='date', y='fracchange_y', ax=axes[0]) res_df.plot(kind='line', color='Blue', x='date', y='err', ax=axes[1], title='fraclow') #res_df.plot(kind='line', color='red', x='date', y='fraclow_y', ax=axes[1]) plt.show() print(res_df[['date', 'fracchange_x', 'fracchange_y']]) return result
import numpy as np from numpy import genfromtxt from hmmlearn.hmm import GaussianHMM label_data = genfromtxt('Label', delimiter=',') observation_data = genfromtxt('Observations.csv', delimiter=',') runs = [[] for _ in range(6000)] for i in range(len(label_data)): tuple = label_data[i] run = int(tuple[0])-1 step = int(tuple[1])-1 angle = observation_data[run][step] x = tuple[2] y = tuple[3] runs[run].append([x,y,angle]) model2 = GaussianHMM(n_components=4, covariance_type="diag", n_iter=1000).fit(runs[0]) preds = model2.predict(runs[1]) print(preds)
date_list = train_data['update_date'][5:] r_5_test = np.array(np.log(test_data['close'][5:])) - np.array( np.log(test_data['close'][:-5])) r_1_test = (np.array(np.log(test_data['close'][1:])) - np.array(np.log(test_data['close'][:-1])))[4:] r_range_test = (np.array(np.log(test_data['high'])) - np.array(np.log(test_data['low'])))[5:] date_list_test = test_data['update_date'][5:] X = np.column_stack([r_1, r_5, r_range]) X_test = np.column_stack([r_1_test, r_5_test, r_range_test]) hmm = GaussianHMM(n_components=13, covariance_type='diag', n_iter=5000).fit(X) latent_states_sequence_train = hmm.predict(X) len(latent_states_sequence_train) sns.set_style('white') mean_return_dict = {} plt.figure(figsize=(15, 8)) for i in range(hmm.n_components): state = (latent_states_sequence_train == i) plt.plot(date_list[state], train_data['close'][state], 'o', label='latent state %d' % i, lw=1) plt.legend() plt.grid(1)
@author: mac """ #%% df = pd.read_csv('trainDemo.csv', encoding="utf-8") df.iloc[:, 1].plot() dataset_X = df.iloc[:, 1].values.reshape(1, -1).T print(dataset_X.shape) #%% from hmmlearn.hmm import GaussianHMM model = GaussianHMM(n_components=8, covariance_type="diag", n_iter=1000) model.fit(dataset_X) #%% hidden_states = model.predict(dataset_X) #%% for i in range(model.n_components): # 打印出每个隐含状态 mean = model.means_[i][0] variance = np.diag(model.covars_[i])[0] print('Hidden state: {}, Mean={:.3f}, Variance={:.3f}'.format( (i + 1), mean, variance)) #%% # 使用HMM模型生成数据 N = 2348 samples, _ = model.sample(N) plt.plot(samples[:, 0]) #%% print(samples) import numpy
# HMMMLearn #################################################################################### #################################################################################### #################################################################################### import numpy as np from hmmlearn.hmm import GaussianHMM new_x = np.asarray(x_train) n_comps = 6 model = GaussianHMM(n_comps) model.fit([new_x]) hidden_states = model.predict(new_x) ############################################################################### # print trained parameters and plot import pylab as pl from matplotlib.finance import quotes_historical_yahoo from matplotlib.dates import YearLocator, MonthLocator, DateFormatter print("Transition matrix") print(model.transmat_) print() print("means and vars of each hidden state") for i in range(n_comps):
import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=RuntimeWarning) from hmmlearn.hmm import GaussianHMM import numpy as np #samples: X = np.array([[-1.03573482, -1.03573482], [6.62721065, 11.62721065], [3.19196949, 8.19196949], [0.38798214, 0.38798214], [2.56845104, 7.56845104], [5.03699793, 10.03699793], [5.87873937, 10.87873937], [4.27000819, -1.72999181], [4.02692237, -1.97307763], [5.7222677, 10.7222677]]) # Trainning a new model over samples: model = GaussianHMM(n_components=3, covariance_type="diag").fit(X) # Create a new copy of the trained model: new_model = GaussianHMM(n_components=3, covariance_type="diag") new_model.startprob_ = model.startprob_ new_model.transmat_ = model.transmat_ new_model.means_ = model.means_ m = model._covars_ n = model.covars_ p = model.get_params() new_model.covars_ = model._covars_ # Predict from X: X_N = new_model.predict(X) print(X_N)
import pylab as pl import numpy as np from hmmlearn.hmm import GaussianHMM from matplotlib.dates import YearLocator, MonthLocator, DateFormatter import nyc ############################################################################### # print trained parameters and plot ############################################################################### new_x = np.asarray(train_set) n_comps = 6 model = GaussianHMM(n_comps) model.fit([new_x]) hidden_states = model.predict(new_x) print("means and vars of each hidden state") for i in range(n_comps): print("%dth hidden state" % i) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i])) print() years = YearLocator() # every year months = MonthLocator() # every month yearsFmt = DateFormatter('%Y') fig = pl.figure() ax = fig.add_subplot(111) ald = np.asarray(all_days)
endTime = '2018-12-1' data = tu.get_hist_data('sh', beginTime, endTime, 'D') high = data['high'] low = data['low'] volume = data['volume'] close = data['close'][5:] Date = pd.to_datetime(data.index[5:]) print(Date) logDel = (np.log(np.array(high)) - np.log(np.array(low)))[5:] logRet1 = np.array(np.diff(np.log(close)))[5:] logRet5 = np.log(np.array(close[5:])) - np.log(np.array(close[:-5])) # 指數對數收益差 logVol5 = np.log(np.array(volume[5:])) - np.log(np.array( volume[:-5])) # 指數對數交易量差 plt.hist(logVol5, 200, normed=1, facecolor='green', alpha=0.75) # plt.show() # print(logDel) A = np.column_stack([logDel[:100], logRet5[:100], logVol5[:100]]) #1D-2D print(A) model = GaussianHMM(n_components=6, covariance_type='diag', n_iter=2000).fit(A) hidden_states = model.predict(A) print(hidden_states) plt.figure(figsize=(10, 5)) sns.set_style('white') for i in range(model.n_components): pos = (hidden_states == i) plt.plot(Date[i], close[i], 'o', label='hidden state %d' % i, lw=360) plt.legend() plt.grid(10) plt.show()
#spx_ret = spx_ret * 1000.0 rets = np.column_stack([spx_ret]) # Create the Gaussian Hidden markov Model and fit it # to the SPY returns data, outputting a score hmm_model = GaussianHMM( n_components=3, # number of states covariance_type="full", # full covariance matrix vs diagonal n_iter=1000 # number of iterations ).fit(rets) print("Model Score:", hmm_model.score(rets)) # Plot the in sample hidden states closing values # Predict the hidden states array hidden_states = hmm_model.predict(rets) print('Percentage of hidden state 1 = %f' % (sum(hidden_states)/len(hidden_states))) print("Transition matrix") print(hmm_model.transmat_) print("Means and vars of each hidden state") for i in range(hmm_model.n_components): # 0 is down, 1 is up print("{0}th hidden state".format(i)) print("mean = ", hmm_model.means_[i]) print("var = ", np.diag(hmm_model.covars_[i])) fig, axs = plt.subplots(hmm_model.n_components, sharex=True, sharey=True) colours = cm.rainbow(np.linspace(0, 1, hmm_model.n_components)) for i, (ax, colour) in enumerate(zip(axs, colours)):
def predict_states(X,group_id,empirical_states): #print("fitting to HMM and decoding ...") max_state_number = (group_id+1)*10 n_components = 2 # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) # Train n number of HMM to avoid loacl minimal max_score = 0 max_proba_states = [] transmat = [[]] n = 2 for i in range(1,n): model.fit([X]) score = model.decode(X)[0] if i==1 or max_score < score: max_score = score max_proba_states = model.predict(X) transmat = model.transmat_ ''' print "score", score # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print hidden_states ''' # end multiple training #print max_score, max_proba_states, transmat # Compare the state with empirical states max_proba_states = max_proba_states.tolist() max_proba_states_inver = [] for s in max_proba_states: max_proba_states_inver.append(0 if s == 1 else 1) #print empirical_states, max_proba_states, max_proba_states_inver difference_state = np.subtract(np.array(max_proba_states),np.array(empirical_states)).tolist() difference_state_inver = np.subtract(np.array(max_proba_states_inver),np.array(empirical_states)).tolist() difference = np.sum(np.power(difference_state,2)) difference_inver = np.sum(np.power(difference_state_inver,2)) #print difference, difference_inver if(difference_inver < difference): max_proba_states = max_proba_states_inver # end switch bits # Predict future state future_states_proba = np.dot([0,1],transmat) future_state = 0 if future_states_proba[1] > future_states_proba[0]: future_state = 1 # End result_states = max_proba_states+[future_state for i in range(0,max_state_number-len(max_proba_states))]; return result_states print("done\n")
def MyGaussianHMM(): from hmmlearn.hmm import GaussianHMM df = pd.read_csv( "/home/ray/Documents/suibe/2017/建模/Modeling_Preparation/dataset/SZIndex.csv", header=-1) df.head() X = np.array(df.iloc[:, 0:5]) # 一、未知模型情况下,解决问题3 model = GaussianHMM(n_components=6, covariance_type="diag", n_iter=1000) # 方差矩阵为对角阵 """ 参数解释: covariance_type: "spherical" :主对角元素均为1,其余元素为0,独立同分布 (数据不足时,难以进行参数估计) "diag" :主对角元素不为0,其余为0 (一般情况,折中) "full" :所有元素均不为0 (数据足够进行参数估计时) """ model.fit(X) print "隐含状态为: ", model.predict(X) # 列出每一天的隐含状态 print "特征数目 %s" % model.n_features print "隐状态数目 %s" % model.n_components print "起始概率 :", model.startprob_ print "隐状态转移矩阵", model.transmat_ ## 每个隐含层对应的特征概率空间假设为正态分布,则可以得到一个model.n_components行model.n_features列的均值矩阵 print "混淆矩阵:均值部分", model.means_ print "混淆矩阵:方差部分", model.covars_ ## 绘图 hidden_states = model.predict(X) tradeDate = df.iloc[:, 5].values closeIndex = df.iloc[:, 6].values plt.figure(figsize=(15, 8)) for i in range(model.n_components): idx = (hidden_states == i) plt.plot_date(pd.to_datetime(tradeDate[idx]), closeIndex[idx], '.', label='%dth hidden state' % i, lw=1) plt.legend() plt.grid(1) plt.show() # 二、已知模型情况下,解决问题1,2 ## 沿用上述模型 ### 问题1 print "某天出现该观测的概率为: %s" % np.exp(model.score(X[0])) ### 问题2 log_prob, state = model.decode(X[:10], algorithm="viterbi") print "只根据前十天,推断出最有可能的隐含状态序列为:", state ## 自己输入模型参数 ### 一个2特征,4隐状态情况 startprob = np.array([0.6, 0.3, 0.1, 0.0]) # The transition matrix, note that there are no transitions possible # between component 1 and 3 transmat = np.array([[0.7, 0.2, 0.0, 0.1], [0.3, 0.5, 0.2, 0.0], [0.0, 0.3, 0.5, 0.2], [0.2, 0.0, 0.2, 0.6]]) # The means of each component means = np.array([[0.0, 0.0], [0.0, 11.0], [9.0, 10.0], [11.0, -1.0]]) # The covariance of each component covars = .5 * np.tile(np.identity(2), (4, 1, 1)) model2 = GaussianHMM(n_components=4, covariance_type="full", n_iter=1000) model2.startprob_ = startprob model2.transmat_ = transmat model2.means_ = means model2.covars_ = covars
diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] # Pack diff and volume for training. X = np.column_stack([diff, volume]) ############################################################################### # Run Gaussian HMM print("fitting to HMM and decoding ...", end="") # Make an HMM instance and execute fit model = GaussianHMM(n_components=4, covariance_type="diag", n_iter=1000).fit(X) # Predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print("done") ############################################################################### # Print trained parameters and plot print("Transition matrix") print(model.transmat_) print() print("Means and vars of each hidden state") for i in range(model.n_components): print("{0}th hidden state".format(i)) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i])) print()
# 08-09 # 09-10 # 10-11 # 12-13 # 13-14 # 14-15 "Número de estados deseados" Nc = 3 " Se entrena el HMM y se estima la serie de estados probables" wind_leap = wind.reshape(-1, 1) model = GaussianHMM(n_components=Nc, covariance_type="diag", n_iter=1000).fit(wind_leap) hidden_states = model.predict(wind_leap) " Matriz de estados, donde cada fila es un año de estados" state_matrix = np.reshape(hidden_states, (27, 120)) state_matrix = state_matrix + 1 state_matrix[state_matrix == 3] = 11 state_matrix[state_matrix == 1] = 33 # state_matrix[state_matrix == 2] = 55 state_matrix[state_matrix == 33] = 3 state_matrix[state_matrix == 11] = 1 # state_matrix[state_matrix == 55] = 5 # Dos estados if Nc == 2:
def runHmm(patient_record,date_list,group_id,empirical_states): ############################################################################### # Processing the data max_state_number = (group_id+1)*10 X = np.zeros(shape=(max(len(patient_record),2),20)) index = 0 for date in date_list: tmp_list = [] #print(date) for key, value in patient_record[date].iteritems(): tmp_list.append(value) X[index] = np.array(tmp_list) index+=1 # if no lab test is available, train with an all zero array if X.shape[0] == 0: X = np.zeros(shape=(2,20)) elif X.shape[0] == 1: X[1] = np.zeros(shape=(1,20)) #print(X) #print(X.shape) ############################################################################### # Run Gaussian HMM print("fitting to HMM and decoding ...") n_components = 2 # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) # Train n number of HMM to avoid loacl minimal max_score = 0 max_proba_states = [] transmat = [[]] n = 2 for i in range(1,n): model.fit([X]) score = model.decode(X)[0] if i==1 or max_score < score: max_score = score max_proba_states = model.predict(X) transmat = model.transmat_ ''' print "score", score # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print hidden_states ''' # end multiple training #print max_score, max_proba_states, transmat # Compare the state with empirical states max_proba_states = max_proba_states.tolist() max_proba_states_inver = [] for s in max_proba_states: max_proba_states_inver.append(0 if s == 1 else 1) #print empirical_states, max_proba_states, max_proba_states_inver difference_state = np.subtract(np.array(max_proba_states),np.array(empirical_states)).tolist() difference_state_inver = np.subtract(np.array(max_proba_states_inver),np.array(empirical_states)).tolist() difference = np.sum(np.power(difference_state,2)) difference_inver = np.sum(np.power(difference_state_inver,2)) #print difference, difference_inver if(difference_inver < difference): max_proba_states = max_proba_states_inver # end switch bits # Predict future state future_states_proba = np.dot([0,1],transmat) future_state = 0 if future_states_proba[1] > future_states_proba[0]: future_state = 1 # End result_states = max_proba_states+[future_state for i in range(0,max_state_number-len(max_proba_states))]; return result_states ''' state = [0,1] transmat = np.array(model.transmat_) print np.dot(state,transmat) print np.array(model.transmat_) #print (hidden_states) #print (hidden_states.shape) ''' print("done\n")
alpha=0.75) plt.show() # Observation sequences matrix A = np.column_stack([logDel, logRet_5, logVol_5]) # Rescaled observation sequences matrix rescaled_A = np.column_stack( [rescaled_boxcox_logDel, rescaled_logRet_5, rescaled_logVol_5]) # HMM modeling based on raw observation sequences model = GaussianHMM(n_components=3, covariance_type="full", n_iter=2000).fit([A]) hidden_states = model.predict(A) hidden_states # Plot the hidden states plt.figure(figsize=(25, 18)) for i in range(model.n_components): pos = (hidden_states == i) plt.plot_date(Date[pos], close[pos], 'o', label='hidden state %d' % i, lw=2) plt.legend(loc="left") # Trading test according to the hidden states for i in range(3):
fillValue = 30.0 elif parameter == 'Length': fillValue = 325.0 else: fillValue = 0 if (parameter + '_smoothed') not in fbf.columns: fbf[parameter] = fbf[parameter].fillna(method='pad', limit=5).fillna(fillValue) fbf = smooth(fbf, parameter) fbf.to_pickle(directory + '/frame_by_frame_synced.pickle') #CREATE HIDDEN MARKOV MODEL _fbf = fbf.loc[fbf['synced_time'] > np.timedelta64(0,'ns')] #take only post-stimulus data X = np.column_stack(_fbf[ i +'_smoothed'] for i in parameters) state_values = pd.DataFrame(THE_model.predict(X), columns=['state']) #DISCARD CASES WHERE ONE OR MORE STATES OCCURS RARELY (<1%). DISCARD = False for i in list(set(state_values['state'])): if (len(state_values[state_values['state']==i]) / float(len(state_values)) < 0.005) & (len(state_values[state_values['state']==i]) >0): print i, len(state_values), len(state_values[state_values['state'] == i]), '\t', FLY_ID state_values.loc[state_values['state']==i, 'state'] = np.nan #DISCARD = True state_values['state'] = state_values['state'].fillna(method='pad').fillna(method='bfill') state_values = np.array(state_values['state']) statesdf = pd.DataFrame(state_values, columns=['state'], index = _fbf.index) statesdf['FLY_ID'] = FLY_ID try: statesdf['GROUP'] = GROUP statesdf.to_pickle(directory + '/states.pickle')
def predict_states(X, group_id, empirical_states): #print("fitting to HMM and decoding ...") max_state_number = (group_id + 1) * 10 n_components = 2 # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) # Train n number of HMM to avoid loacl minimal max_score = 0 max_proba_states = [] transmat = [[]] n = 2 for i in range(1, n): model.fit([X]) score = model.decode(X)[0] if i == 1 or max_score < score: max_score = score max_proba_states = model.predict(X) transmat = model.transmat_ ''' print "score", score # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print hidden_states ''' # end multiple training #print max_score, max_proba_states, transmat # Compare the state with empirical states max_proba_states = max_proba_states.tolist() max_proba_states_inver = [] for s in max_proba_states: max_proba_states_inver.append(0 if s == 1 else 1) #print empirical_states, max_proba_states, max_proba_states_inver difference_state = np.subtract(np.array(max_proba_states), np.array(empirical_states)).tolist() difference_state_inver = np.subtract(np.array(max_proba_states_inver), np.array(empirical_states)).tolist() difference = np.sum(np.power(difference_state, 2)) difference_inver = np.sum(np.power(difference_state_inver, 2)) #print difference, difference_inver if (difference_inver < difference): max_proba_states = max_proba_states_inver # end switch bits # Predict future state future_states_proba = np.dot([0, 1], transmat) future_state = 0 if future_states_proba[1] > future_states_proba[0]: future_state = 1 # End result_states = max_proba_states + [ future_state for i in range(0, max_state_number - len(max_proba_states)) ] return result_states print("done\n")
dim_h = 5 N_train = 500 n_stocks = 1 X = in_data[:N_train,:(n_stocks*3)] n_factors = X.shape[1] / n_stocks # Make an HMM instance and execute fit model = GaussianHMM(n_components=dim_h, covariance_type="diag", n_iter=1000).fit(in_data_ema[:(N_train),:]) RMSE_train = np.zeros(N_train) ER_train = np.zeros(N_train) # Predict the optimal sequence of internal hidden state hidden_states = model.predict(in_data_ema[:N_train,:]) state_cur = hidden_states[i] # model.transmat_ pred_ind = np.arange(n_stocks) * n_factors mean_cur = model.means_[state_cur,:] mean_pred = mean_cur[pred_ind] # need prev_ema = in_data_ema[i,pred_ind] mean_pred = rm_ema(mean_pred, prev_ema, n_ema=n_ema) covar_cur = model.covars_[state_cur,:] covar_pred = covar_cur[pred_ind,:][:,pred_ind] covar_pred = rm_ema(covar_pred, 0, n_ema=n_ema) y_true = in_data[(i+1),pred_ind]
plt.show() '''for i in range(3,30): km = GaussianMixture(n_components = i, covariance_type = 'diag').fit(dt1) bc.append(km.bic(dt1)) plt.plot(bc) plt.show() vec = km.predict(dt1) plt.scatter(dt[0,:], dt[1,:], c=vec) plt.show() print km.bic(dt1)''' #%% #y1 = dt[:,700:1500] #y2 = dt[:,1600:2000] #y3 =np.append(y1,y2, axis=1) y3 = dt[:, 220:270] md = GaussianHMM(n_components=7, n_iter=100).fit(np.transpose(y3)) print md.score(np.transpose(y3)) plt.plot(md.predict(dt1)) plt.show() #joblib.dump(md, "Clasificadores/md7.pkl")
data_label = activity_data.as_matrix() test_feature = feature_test.as_matrix() test_label = activity_test.as_matrix() lengths = data_feature.shape[0] # --- Run Gaussian HMM --- # print "fitting to HMM and decoding ..." # --- Make an HMM instance and execute fit --- # model = GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000).fit(data_feature) # --- Predict the optimal sequence of internal hidden state FOR DATA CSV!--- # # --- the following is generating figure #1, and it predicts state sequence from DATA csv --- # hidden_states = model.predict(data_feature) time_axis = np.asarray(range(len(hidden_states))) # --- fancy plots of different states in HMM --- # fig1_data,axs = plt.subplots(model.n_components, sharex=True, sharey=True) fig1_data.suptitle('Estimated State Sequence for Training Data') colours = cm.rainbow(np.linspace(0, 1, model.n_components)) for i, (ax, colour) in enumerate(zip(axs, colours)): # --- Use fancy indexing to plot data in each state --- # mask = hidden_states == i ax.plot(time_axis[mask], data_feature[:,1][mask], ".", c=colour) ax.set_title("{0}th hidden state".format(i)) ax.grid(True) # --- the following is generating figure #2, and it plots actual label sequence from DATA csv --- #
from hmmlearn.hmm import GaussianHMM from matplotlib import cm, pyplot as plt from matplotlib.dates import YearLocator, MonthLocator import numpy as np import pandas as pd import seaborn as sns client = bitmex.bitmex(test=IS_TEST, api_key=API_KEY, api_secret=API_SECRET) prices = pd.DataFrame( client.Trade.Trade_getBucketed( binSize='1d', symbol='XBTUSD', count=1000, reverse=True, ).result()[0]) prices.set_index(['timestamp'], inplace=True) prices = prices.sort_values(by='timestamp', ascending=True) rets = np.column_stack([prices['close'].pct_change()]) rets[0] = 0 hmm_model = GaussianHMM(n_components=2, covariance_type="full", n_iter=10).fit(rets) print("Model Score:", hmm_model.score(rets)) hmm_model.predict(rets) hmm_model.predict(rets2)[-1]