Beispiel #1
0
def cmodel(company, dt1, dt2, num_of_states):

    quotes = quotes_historical_yahoo_ochl(company, dt1,
                                          dt2)  #Here we set the time range

    # Unpack the quotes !
    dates = np.array([q[0] for q in quotes], dtype=int)
    close_v = np.array([q[2] for q in quotes])

    # Take diff of close value and shift by 1

    diff = np.diff(close_v)

    dates = dates[1:]
    close_v = close_v[1:]

    # Pack diff for training.
    X = np.column_stack([diff])

    # Create HMM instance and fit
    model = GaussianHMM(n_components=num_of_states,
                        covariance_type="full",
                        n_iter=1000).fit(X)
    #print ("Model Covars: ", model.covars_)

    expected_days = 1
    tr_mls = 1

    if (num_of_states > 1):
        #Identify the most likely last hidden state

        try:
            hidden_probs = model.predict_proba(X)
        except:
            model = GaussianHMM(n_components=num_of_states,
                                covariance_type="diag",
                                n_iter=1000).fit(X)
            hidden_probs = model.predict_proba(X)

        lstate_prob = hidden_probs[-1]
        mls = lstate_prob.argmax()

        # self transition probability for the most likely last hidden state
        tr_mls = model.transmat_[mls][mls]

        # we make use of the geometric series formula to calculate the number
        # of days expected to stay at the current state
        expected_days = (1.0 / (1 - tr_mls))

    # we save the model for future use
    fname = str(company) + "_" + str(num_of_states) + "_states_model_final.pkl"
    joblib.dump(model, os.path.join('./sims_final', fname))

    #return expected days
    return expected_days, tr_mls
def fit_hmm(turb_series):
    """
    This module fits the HMM model 
    And also outputs some of the model results such 
    as the persistence probability and the transition probability
    A two state Gaussian model is used here
    """
    a = turb_series.copy()
    hmm_model = GaussianHMM(n_components=2,
                            covariance_type="full",
                            n_iter=1000).fit(a)
    hidden_states = hmm_model.predict(a)
    initial_state = hidden_states[0]
    persistence_normal = hmm_model.transmat_[0][0]
    transition_normal = hmm_model.transmat_[0][1]
    mean_normal = hmm_model.means_[0][0]
    Std_Dev_normal = np.sqrt(hmm_model.covars_[0])[0][0]
    persistence_event = hmm_model.transmat_[1][1]
    transition_event = hmm_model.transmat_[1][0]
    mean_event = hmm_model.means_[1][0]
    Std_Dev_event = np.sqrt(hmm_model.covars_[1])[0][0]

    hmm_model_results = [initial_state,persistence_normal,transition_normal,\
                         mean_normal,Std_Dev_normal,persistence_event,\
                         transition_event,mean_event, Std_Dev_event]
    hidden_states = pd.DataFrame(hidden_states,\
                                 columns=["NormalorEventClass"],index=a.index)
    posterior_prob = hmm_model.predict_proba(a)
    posterior_prob = pd.DataFrame(posterior_prob,columns=["Event.Prob",\
    "Normal.Prob"],index=a.index)
    return pd.concat([a, posterior_prob, hidden_states],
                     axis=1), hmm_model_results
Beispiel #3
0
    def train(self, k, train_set, valid_set):
        train_wavs, train_folds, train_labels = zip(*list(chain(*train_set)))
        train_wavs, train_folds, train_labels = np.array(train_wavs), np.array(
            train_folds), np.array(train_labels)

        train_sample = len(train_wavs)
        train_x, _ = self.fix_frame(train_sample, train_wavs, train_folds,
                                    train_labels)

        # Test Model
        valid_wavs, valid_folds, valid_labels = zip(*valid_set)
        valid_wavs, valid_folds, valid_labels = np.array(valid_wavs), np.array(
            valid_folds), np.array(valid_labels)

        valid_sample = len(valid_wavs)
        valid_x, valid_y = self.fix_frame(valid_sample, valid_wavs,
                                          valid_folds, valid_labels)

        if config.isPCA:
            pca = PCA(n_components=config.n_pca)
            pca.fit(train_x)
            train_x = pca.transform(train_x)
            valid_x = pca.transform(valid_x)

        hmm = GaussianHMM(n_components=self.component)
        hmm.fit(train_x)
        joblib.dump(hmm, f"{self.model_path}/hmm10-{k}.pkl")

        score = purity_score(np.argmax(valid_y, axis=1),
                             np.argmax(hmm.predict_proba(valid_x)))
        print('Accuracy:{0:.3f}'.format(score))
Beispiel #4
0
class GaussHMM:
    def __init__(self, init):
        self.init = init

    def fit(self, signals, channels):

        self.hmm = GaussianHMM(n_components=len(self.init),
                               covariance_type="full",
                               n_iter=100)
        self.hmm.fit(np.array(signals).reshape([-1, 1])[:100])
        self.hmm.means_ = self.get_mean(signals, channels)
        self.hmm.covars_ = self.get_cov(signals, channels)
        self.hmm.startprob_ = self.init
        self.hmm.transmat_ = self.markov_p_trans(channels)

    def predict(self, signals):
        pred = self.hmm.predict(signals.reshape([-1, 1]))
        return pred

    def predict_proba(self, signals):
        prob = self.hmm.predict_proba(signals.reshape([-1, 1])).round(3)
        return prob

    def get_mean(self, signals, channels):

        sig_mean = []
        for chan_i in range(len(np.unique(channels))):
            sig_mean.append(signals[channels == chan_i].mean())

        return np.array(sig_mean).reshape([-1, 1])

    def get_cov(self, signals, channels):

        sig_cov = []
        for chan_i in range(len(np.unique(channels))):
            sig_cov.append(np.cov(signals[channels == chan_i]))

        return np.array(sig_cov).reshape([-1, 1, 1])

    def markov_p_trans(self, states):
        max_state = np.max(states)
        states_next = np.roll(states, -1)
        matrix = []
        for i in range(max_state + 1):
            current_row = np.histogram(states_next[states == i],
                                       bins=np.arange(max_state + 2))[0]
            if np.sum(current_row
                      ) == 0:  # if a state doesn't appear in states...
                current_row = np.ones(max_state + 1) / (
                    max_state + 1)  # ...use uniform probability
            else:
                current_row = current_row / np.sum(
                    current_row)  # normalize to 1
            matrix.append(current_row)
        return np.array(matrix)
Beispiel #5
0
def HHM_stock(stock,startdate,enddate,predict_startdate,predict_enddate,hmmcomponents=4,cov_type='full'):
    from hmmlearn.hmm import GMMHMM,GaussianHMM
    import datetime
    import numpy as np
    import pandas as pd
    import warnings
    
    def get_hmm_feature(stock, startdate, enddate):
        df = get_price(stock, start_date=startdate, end_date=enddate, frequency='1d', fields=['close','money','volume','high','low','open'],skip_paused=True)
        close = df['close']
        high = df['high'][5:]
        low = df['low'][5:]
        volume = df['volume'][5:]
        opens= df['open'][5:]
        datelist = pd.to_datetime(close.index[5:])
        logreturn = (np.log(np.array(close[1:]))-np.log(np.array(close[:-1])))[4:]
        logreturn5 = np.log(np.array(close[5:]))-np.log(np.array(close[:-5]))
        rangereturn = (np.log(np.array(high))-np.log(np.array(low)))
        closeidx = close[5:]
        rangereturn = (np.log(np.array(high))-np.log(np.array(low)))
        money = df['money']
        money_ma5= pd.rolling_mean(money,4)
        money_ma5_rate= np.log(np.array(money[5:]))-np.log(np.array(money_ma5[4:-1]))    
        return (closeidx,datelist,np.column_stack([logreturn,rangereturn,logreturn5,money_ma5_rate]))

    closeidx_fit,datelist_fit,data_fit = get_hmm_feature(stock, startdate, enddate)
    closeidx_pred,datelist_pred,data_predict = get_hmm_feature(stock, predict_startdate, predict_enddate)

    warnings.filterwarnings("ignore")  # diag
    hmm = GaussianHMM(n_components = hmmcomponents, covariance_type=cov_type,n_iter = 5000).fit(data_fit)
    #latent_states_sequence = hmm.predict(data_fit)
    
    hidden_state_meaning = hhm_state2read(hmm)
    readable_state_hidden = {meaning:state for state,meaning in hidden_state_meaning.items()}

    _,predict_states_sequence = hmm.decode(data_predict)
    predict_all_scores_sequence = hmm.predict_proba(data_predict)
    predict_states_score_sequence = [predict_all_scores_sequence[idx][s] for idx,s in enumerate(predict_states_sequence)]
    hhm_score =  pd.DataFrame(predict_all_scores_sequence,columns=[hidden_state_meaning[state] for state in range(hmm.n_components)],index=datelist_pred).applymap(lambda x:round(x,5))
    
    hhm_pred =  pd.DataFrame({'close':closeidx_pred
                                     ,"state":predict_states_sequence
                                      ,'score':predict_states_score_sequence
                                     ,'action':[hidden_state_meaning[s] for s in predict_states_sequence]},index=datelist_pred)
    #return pd.concat([hhm_pred,hhm_score],axis=1)
    return (hmm,hhm_pred)
Beispiel #6
0
class StockHMM:
    def __init__(self, stock=STOCK.Google):
        if stock == STOCK.Google:
            path = './data/GOOG.csv'
        elif stock == STOCK.Baidu:
            path = './data/BIDU.csv'
        elif stock == STOCK.Tencent:
            path = './data/TCEHY.csv'
        else:
            print('Invalid argument!')
            raise SystemError()

        # initialize data
        data, self.dates = self.get_data(path=path)
        self.open = data[:, 0]
        self.high = data[:, 1]
        self.low = data[:, 2]
        self.close = data[:, 3]
        self.adj_close = data[:, 4]
        self.volume = data[:,
                           5]  # the number of stocks in stock transactions per day
        self.model = None

    # read data
    def get_data(self, path):
        f = open(path)
        lines = f.readlines()
        f.close()
        # the first line is the header
        lines = lines[1:]
        x = []
        dates = []
        for line in lines:
            data = np.double(line.split(',')[1:7])
            dates.append(line.split(',')[0])
            x.append(data)  # [1] is the opening price
        return np.array(x), np.array(dates)

    # train model with nc hidden states from the first n (including) data
    def train(self, nc, n):
        features = self.features_extraction(n)
        self.model = GaussianHMM(n_components=nc,
                                 covariance_type="full",
                                 n_iter=2000).fit(
                                     features)  # predict HMM models

    # extract features from first n (not including) data
    def features_extraction(self, n):
        assert 5 < n < self.high.shape[0]

        ld_hl = np.log(self.high) - np.log(
            self.low)  # log difference of high and low
        ld_c5 = np.log(self.close[5:n]) - np.log(
            self.close[:n - 5])  # log difference of close (every 5 days)
        ld_v5 = np.log(self.volume[5:n]) - np.log(self.volume[:n - 5])
        ld_hl = ld_hl[5:n]
        # concatenate to form features
        features = np.column_stack([ld_hl, ld_c5, ld_v5])  # dim: (n-5) * 3
        return features

    # predict the states of the nth period
    def predict(self, n):
        features = self.features_extraction(n - 1)
        hidden_states_proba = self.model.predict_proba(features)
        states = hidden_states_proba[-1, :]

        return states.dot(self.model.transmat_)
Beispiel #7
0
# close_v = np.reshape(close_v, (1, close_v.shape[0]))

print('_log_returns.shape', _log_returns.shape)
print('dates.shape', dates.shape)
print('close_v.shape', close_v.shape)

# Pack _log_returns and volume for training.
X = np.column_stack([_log_returns, volume])

print("fitting to HMM and decoding ...", end="")

# Make an HMM instance and execute fit
model = GaussianHMM(n_components=3, covariance_type="diag", n_iter=1000).fit(X)

# Predict the optimal sequence of internal hidden state
hidden_states = model.predict_proba(X)
print('hidden_states.shape', hidden_states.shape)
#exit()
print("done")

print("Transition matrix")
print(model.transmat_)
print()

print("Means and vars of each hidden state")
for i in range(model.n_components):
    print("{0}th hidden state".format(i))
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covars_[i]))
    print()
Beispiel #8
0
test_data[:, 4] = dt.min_max_normalize(
    test_data[:, 4],
    method='tanh')  #2*(test_data[:, 4]-min_vol)/(max_vol-min_vol)-1

hmm_input_train = np.column_stack([train_data[:, 5]])
hmm_input_test = np.column_stack([test_data[:, 5]])

if (save_model and os.path.isfile(hmm_model_file)):
    hmm_model = joblib.load(hmm_model_file)
else:
    hmm_model = GaussianHMM(n_components=hmm_components,
                            covariance_type="diag",
                            n_iter=1000).fit(hmm_input_train)
    joblib.dump(hmm_model, hmm_model_file)

hmm_train = hmm_model.predict_proba(hmm_input_train)
hmm_test = hmm_model.predict_proba(hmm_input_test)

if (False):
    ax1 = plt.subplot(2, 1, 1)
    ax1.plot(df['close'].values[train_rows:], label='Close')
    #plt.set_autoscaley_on(True)

    ax2 = plt.subplot(2, 1, 2)
    # ax2.plot(test_data[:, 7])
    ax2.plot(hmm_test[:, 0], label='Hidden 0')
    ax2.plot(hmm_test[:, 1], label='Hidden 1')
    ax2.plot(hmm_test[:, 2], label='Hidden 1')
    ax2.set_ylim([0, 1])

    plt.show()
Beispiel #9
0
model = model.fit(X)

print("样本量:")
print(X.shape)
print("给定的隐藏特征数目:")
print(n)
print("初始的隐藏状态概率π:")
print(model.startprob_)
print("状态转移矩阵A参数:")
print(model.transmat_)
print("估计均值:")
print(model.means_)
print("估计方差:")
print(model.covars_)
print("预测的概率:")
y = model.predict_proba(X)
print(y)
hidden_states = model.predict(X)
print("预测状态值:")
print(hidden_states)
print(model.score(X))

# HMM模型只是能分离出不同的状态,具体对每个状态赋予现实的市场意义,是需要人为来辨别和观察的。
for j in range(len(close)-1):
    for i in range(model.n_components):
        if hidden_states[j] == i:
            plt.plot([dates[j], dates[j+1]], [close[j], close[j+1]], color=colors[i])
plt.show()

# import pandas as pd
# # data = pd.DataFrame({'datelist': dates, 'close': close, 'state': hidden_states}).set_index('dates')