Beispiel #1
0
def bench_gaussian_hmm(size):
    title = "benchmarking Gaussian HMM on a sample of size {0}".format(size)
    print(title.center(36, " "))
    ghmm = GaussianHMM()
    ghmm.means_ = [[42], [24]]
    ghmm.covars_ = [[1], [1]]

    with timed_step("generating sample"):
        sample, _states = ghmm.sample(size)

    with timed_step("fitting"):
        fit = GaussianHMM(n_components=2).fit([sample])

    with timed_step("estimating states"):
        fit.predict(sample)
def mainHMM(filePrefix):
    X_train, length_train, X_test, length_test = loadOneRoute(filePrefix)
    # Run Gaussian HMM
    print "fitting to HMM and decoding ..."
    model = GaussianHMM(n_components=4, covariance_type="diag", n_iter=2000).fit(X_train[:, 0:5], length_train)
    hidden_states = model.predict(X_test[:, 0:5], length_test)
    print "done"

    print hidden_states[0:20]
    print hidden_states[20:40]
    print hidden_states[40:60]
    print hidden_states[60:80]

    # Print trained parameters and plot
    print("Transition matrix")
    print(model.transmat_)
    print("Start Prob")
    print(model.startprob_)

    print("Means and vars of each hidden state")
    for i in range(model.n_components):
        print("{0}th hidden state".format(i))
        print("mean = ", model.means_[i])
        print("var = ", np.diag(model.covars_[i]))


    print np.array(hidden_states).reshape((sum(length_test), 1))
Beispiel #3
0
	def fit(self):

		if self.verbose:
			print "[Clustering] Clearing old model and segmentation"
		
		self.segmentation = []
		self.model = []


		new_segments = []
		new_model = []

		g = GaussianHMM(n_components=self.n_components)

		all_demos = self._demonstrations[0]
		lens = [np.shape(self._demonstrations[0])[0]]
		for i in range(1, len(self._demonstrations)):
			all_demos = np.concatenate([all_demos,self._demonstrations[i]])
			lens.append(np.shape(self._demonstrations[i])[0])

		g.fit(all_demos,lens) 
			
		for d in self._demonstrations:
			new_segments.append(self.findTransitions(g.predict(d)))
			#print g.predict(d)
			new_model.append(g)

		self.segmentation = new_segments
		self.model = new_model
Beispiel #4
0
def main(args):
    x, X = loadDiffRows(args.diffFile)
    model = GaussianHMM(n_components=3,
                        covariance_type="diag",
                        n_iter=100000000000)
    model.transmat_ = numpy.array([[0.5, 0.5, 0.0],
                                   [0.0, 0.5, 0.5],
                                   [0.0, 0.0, 1.0]])
    model.fit(X)
    print(model.transmat_)
    model.transmat_[0][2] = 0.
    model.transmat_[1][0] = 0.
    model.transmat_[2][0] = 0.
    model.transmat_[2][1] = 0.
    
    exp = args.outFile.split('/')[-1].split('_')[0]
    with open(args.outFile, 'w') as fout:
        print('exp\tbin\treads\tstate', file=fout)
        for seq in X:
            hiddenStates = model.predict(seq)
            for idx,v in enumerate(zip(x,hiddenStates)):
                r,h = v
                print(exp + '\t' + str(idx) + '\t'
                      + str(r) + '\t' + str(h),
                      file=fout)
Beispiel #5
0
class HMM:
    __slots__ = [
        "model"
    ]

    def __init__(self):
        pass


    def draw(self, data):
        figure()
        plot(range(len(data)),data,alpha=0.8,color='red')
        show()


    def train(self, data, n_components):
        print("Training Data: %s" % data)
        self.data = data
        self.model = GaussianHMM(n_components, algorithm='viterbi', covariance_type='diag')
        X = np.reshape(data, (len(data),1))
        self.model = self.model.fit([X])

        self.hidden_states = self.model.predict(X)
        print("Sequence of States: " % self.hidden_states)


    def eval(self, obs):
        print("Testing Data: %s" % obs)
        X = np.reshape(obs, (len(obs),1))
        print("Eval: %s" % str(self.model.score(X)))


    def plot(self):
        fig = figure(facecolor="white")
        ax = fig.add_subplot(111)

        for i in range(self.model.n_components):
            # use fancy indexing to plot data in each state
            idx = (self.hidden_states == i)
            ax.plot(np.array(range(len(self.data)))[idx], np.array(self.data)[idx], '.', label="State %d" % (i+1))

        ax.legend()
        show()
class HmmClassifier():
    def __init__(self, referenceSeqs, inputSeq):
        self.referenceSeqs = referenceSeqs
        self.inputSeq = inputSeq

        # feel free to change this model
        self.model = GaussianHMM(n_components=2, covariance_type="full", n_iter=2000)

    def predict(self):
        probs = []
        for referenceSeq in self.referenceSeqs:
            #print "reference: {}".format(referenceSeq)
            self.model.fit(referenceSeq)
            hidden_states = self.model.predict(referenceSeq)
            prob = self.model.score(self.inputSeq)
            probs.append(prob)

        # return the index of the max prob
        return probs.index(max(probs))
    def calculate_weights(self, date, amount):
        if self.stacked == False:
            for elements in self.tradingDates:
                if elements.get('dt') >= self.start_date and elements.get('dt') <= date :
                    self.trainingDates.append(elements['dt'])
            for assetCode in self.asset_codes:
                assetValues = []
#                 for each_date in self.trainingDates:
#                     assetValues.append(StockData.objects.filter(dt=each_date,ticker=assetCode).values("price_close")[0]['price_close'])
                assetValues = [StockData.objects.filter(dt=each_date,ticker=assetCode).values("price_close")[0]['price_close'] for each_date in self.trainingDates]    
                self.historical_Data[assetCode] = assetValues
            self.stacked = True
        else:
            assetValues = []
            for assetCode in self.asset_codes:
                self.historical_Data[assetCode].append(StockData.objects.filter(dt=date,ticker=assetCode).values("price_close")[0]['price_close'])    
        
        target = {'money': amount}    
        for assetCode in self.asset_codes:
            close_v = np.array(self.historical_Data[assetCode])
            diff = np.diff(close_v)
            X = np.column_stack([diff])
            model = GaussianHMM(n_components=2, covariance_type="diag", n_iter=1000).fit(X)
            hidden_states = model.predict(X)
            stableProb = 0
            if hidden_states[len(hidden_states) - 1] == 1:
                stableProb = model.transmat_[1][1]
            else:
                stableProb = 0
            target[assetCode] = stableProb
            target['money'] -= stableProb * close_v[len(close_v) - 1]
            
        self.weight = []
        self.weight.append(target['money'])
#         for assetCode in self.asset_codes:
#             self.weight.append(target[assetCode])
        self.weight += [target[assetCode] for assetCode in self.asset_codes]    
        return self.weight
def hmmtest(trade_data, test_data):
    # pack diff and volume for training
    # delete record containng infinity    
    X = test_data[test_data['Strategy_Gross_Return_RDP_5'] != float("inf")]
    X = test_data
    ###############################################################################
    # Run Gaussian HMM
    #print("fitting to HMM and decoding ...", end='')
    n_components = 4
    covariance_type = 'full'
    n_iter = 1000
    
    # make an HMM instance and execute fit
    model = GaussianHMM(n_components=n_components, covariance_type=covariance_type, n_iter=n_iter).fit(X)
    #model= GMMHMM(n_components=4,n_mix=3,covariance_type="diag", n_iter=100).fit(X)
    # model = MultinomialHMM(n_components=4, n_iter=100).fit(X)
    # predict the optimal sequence of internal hidden state
    hidden_states = model.predict(X)
    
    #print("done\n")
    
    ###############################################################################
    # print trained parameters and plot
    #print("Transition matrix")
    #print(model.transmat_)
    #print()
    
    print("means and vars of each hidden state")
    for i in range(model.n_components):
        print("%dth hidden state" % i)
        print("mean = ", model.means_[i])
        print("var = ", np.diag(model.covars_[i]))
        
        
    plotHmmState(model, hidden_states, trade_data)
    
    return model
Beispiel #9
0
def hmm_weight(df, data_raw, day, n_components, plot=False):

    tr_start, tr_end, te_start, te_end = train_test(day, df)
    col_list = ['update_date', 'open', 'high', 'low', 'close']
    df = df.loc[:, col_list]
    df = df.dropna(axis=0)
    data_raw = data_raw.loc[:, col_list]
    data_raw = data_raw.dropna(axis=0)

    train_df = df.loc[df['update_date'] >= tr_start, :].loc[
        df['update_date'] <= tr_end, :]
    test_df = df.loc[df['update_date'] >= te_start, :].loc[
        df['update_date'] <= te_end, :]

    train_close = data_raw.loc[data_raw['update_date'] >= tr_start, :].loc[
        data_raw['update_date'] <= tr_end, :]
    test_close = data_raw.loc[data_raw['update_date'] >= te_start, :].loc[
        data_raw['update_date'] <= te_end, :]

    if len(train_df) > 0 and len(test_df) > 0:
        r_5 = np.array(
            np.array(np.log(train_df['close'][5:])) -
            np.array(np.log(train_df['close'][:-5])))[:]
        # r_10 = np.array(np.array(np.log(train_df['close'][10:])) - np.array(np.log(train_df['close'][:-10])))

        r_1 = np.array(
            np.array(np.log(train_df['close'][1:])) -
            np.array(np.log(train_df['close'][:-1])))[4:]

        r_range = np.array((np.array(np.log(train_df['high'])) -
                            np.array(np.log(train_df['low']))))[5:]

        r_1 = np.array(
            map(
                lambda x: 0
                if x == np.inf or x == -np.inf or np.isnan(x) else x, r_1))
        r_5 = np.array(
            map(
                lambda x: 0
                if x == np.inf or x == -np.inf or np.isnan(x) else x, r_5))
        # r_10 = np.array(map(lambda x: 0 if x==np.inf or x==-np.inf or np.isnan(x) else x, r_10))
        r_range = np.array(
            map(
                lambda x: 0
                if x == np.inf or x == -np.inf or np.isnan(x) else x, r_range))

        r_1_no_lag = list(r_1[1:])
        r_1_no_lag.append(0)
        r_1_no_lag = np.array(r_1_no_lag)

        date_list = train_df['update_date'][5:]

        r_5_test = np.array(
            np.array(np.log(test_df['close'][5:])) -
            np.array(np.log(test_df['close'][:-5])))[:]
        # r_10_test = np.array(np.array(np.log(test_df['close'][10:])) - np.array(np.log(test_df['close'][:-10])))
        r_1_test = np.array(
            np.array(np.log(test_df['close'][1:])) -
            np.array(np.log(test_df['close'][:-1])))[4:]
        r_1_test = np.array(
            map(
                lambda x: 0
                if x == np.inf or x == -np.inf or np.isnan(x) else x,
                r_1_test))
        r_5_test = np.array(
            map(
                lambda x: 0
                if x == np.inf or x == -np.inf or np.isnan(x) else x,
                r_5_test))
        # r_10_test = np.array(map(lambda x: 0 if x==np.inf or x==-np.inf or np.isnan(x) else x, r_10_test))

        r_1_test_no_lag = list(r_1_test[1:])
        r_1_test_no_lag.append(0)
        r_1_test_no_lag = np.array(r_1_test_no_lag)

        r_range_test = np.array(
            np.array(np.log(test_df['high'])) -
            np.array(np.log(test_df['low'])))[5:]
        r_range_test = np.array(
            map(
                lambda x: 0
                if x == np.inf or x == -np.inf or np.isnan(x) else x,
                r_range_test))

        date_list_test = test_df['update_date'][5:]

        X = np.column_stack([r_1, r_5, r_range])

        X_test = np.column_stack([r_1_test, r_5_test, r_range_test])
        if X.shape[0] >= n_components and X_test.shape[0] >= n_components:

            hmm = GaussianHMM(n_components=n_components,
                              covariance_type='diag',
                              n_iter=2000).fit(X)
            latent_states_sequence_train = hmm.predict(X)

            mean_return_dict = {}
            if plot == True:
                import matplotlib.pyplot as plt
                import seaborn as sns
                sns.set_style('white')
                plt.figure(figsize=(15, 8))

                for i in range(hmm.n_components):
                    state = (latent_states_sequence_train == i)
                    sharpe = (np.mean(r_1_no_lag[state]) * 252 - 0.03) / (
                        np.std(r_1_no_lag[state]) * np.sqrt(252))
                    plt.plot(date_list[state],
                             train_close['close'][state],
                             'o',
                             label='latent state %d: %s' % (i, sharpe),
                             lw=5)
                    plt.legend()
                    plt.grid(1)
                    mean_return_dict[i] = sharpe

                plt.show()
            else:
                for i in range(hmm.n_components):
                    state = (latent_states_sequence_train == i)
                    mean_return_dict[i] = (np.mean(r_1_no_lag[state]) * 252 -
                                           0.03) / (np.std(r_1_no_lag[state]) *
                                                    np.sqrt(252))

            latent_states_sequence_test = hmm.predict(X_test)

            pair = mean_return_dict.items()
            pair = filter(lambda x: False if np.isnan(x[1]) else True, pair)
            pair_sorted = sorted(pair, key=lambda x: x[1])
            highest = pair_sorted[-1]
            lowest = pair_sorted[0]
            # print pair_sorted

            expected_return_series = map(lambda x: mean_return_dict[x],
                                         latent_states_sequence_test)
            expected_return_series = np.array(
                map(lambda x: 1 if x > 0 else -1, expected_return_series[:-1]))
            real_return_series = r_1_test[1:]
            real_return_series = np.array(
                map(lambda x: 1 if x > 0 else -1, real_return_series))

            temp = expected_return_series - real_return_series
            temp = filter(lambda x: True
                          if np.isnan(x) == False else False, temp)
            # acc_rate=(len(temp) - np.sum(np.abs(temp)) / 2.) / len(temp)
            # print acc_rate
            real_return_series = list(real_return_series)
            # print real_return_series.count(1) / float(len(real_return_series))
            # print real_return_series.count(-1) / float(len(real_return_series))
            # print 'time: ',np.max(date_list_test),'expected Sharpe: ',mean_return_dict[latent_states_sequence_test[-1]]

            prediction = pd.DataFrame()
            prediction['update_date'] = date_list_test
            prediction['state'] = latent_states_sequence_test
            prediction['expected_sharpe'] = prediction['state'].apply(
                lambda x: mean_return_dict[x])

            if plot == True:
                sns.set_style('white')
                plt.figure(figsize=(8, 4))
                for i in range(hmm.n_components):
                    state = (latent_states_sequence_test == i)
                    plt.plot(date_list_test[state],
                             test_close['close'][state],
                             'o',
                             label='latent state %d: %s' %
                             (i, mean_return_dict[i]),
                             lw=5)
                    plt.grid(1)
                    plt.legend()

                plt.show()
            else:
                pass
            if plot == True:
                sns.set_style('white')
                plt.figure(figsize=(15, 10))
                # plt.subplot(2,1,1)
                new_frame = copy.deepcopy(prediction)
                new_frame.index = [new_frame['update_date']]
                new_frame['expected_return'] = new_frame[
                    'expected_sharpe'].apply(lambda x: 30 if x > 0 else -30)
                test_close.index = [test_close['update_date']]
                test_close['close'] = test_close['close'] - 420
                test_close = test_close[np.min(new_frame['update_date']):np.
                                        max(new_frame['update_date'])]
                plt.plot(test_close['close'], 'o-', color='red')
                # plt.subplot(2,1,2)
                plt.bar(new_frame.index,
                        new_frame['expected_return'],
                        align='edge',
                        alpha=0.5,
                        color='yellow')
                plt.show()

            return prediction, highest, lowest
        else:
            return None, None, None
    else:
        return None, None, None
Beispiel #10
0
        user_id_list.append(line)
        length = int(file.readline())
        review_time = list()
        for i in range(length):
            info = file.readline().split('\t')
            review_time.append(
                datetime.datetime(int(info[3][:4]), int(info[3][5:7]),
                                  int(info[3][8:10])))
        X.append(interval(review_time))
        lengths.append(length - 1)
        file.readline()
X = np.concatenate(X)

warnings.filterwarnings("ignore")
model = GaussianHMM(n_components=20, n_iter=10000, tol=1, verbose=True)
model.fit(X, lengths)
if model.monitor_.converged:
    print(model.transmat_)
    print(model.means_)
    print(model.covars_)
hidden_state = model.predict(X, lengths)

start = 0
with open("hidden_states.txt", 'w') as file:
    for i in range(len(user_id_list)):
        file.write(user_id_list[i])
        for j in range(lengths[i]):
            file.write(str(hidden_state[start + j]) + '\t')
        start += lengths[i]
        file.write('\n\n')
Beispiel #11
0
        mus = np.flipud(mus)
        sigmas = np.flipud(sigmas)
        P = np.fliplr(np.flipud(P))
        hidden_states = 1 - hidden_states

    return hidden_states, mus, sigmas, P, logProb, samples

'''

# %%

Q = data.iloc[10, 6]

# hidden_states, mus, sigmas, P, logProb, samples = fitHMM(Q, 100)
model = GaussianHMM(n_components=4, n_iter=500).fit(np.reshape(Q, [len(Q), 1]))
hidden_states = model.predict(np.reshape(Q, [len(Q), 1]))
# find parameters of Gaussian HMM
mus1 = np.array(model.means_)
sigmas = np.array(
    np.sqrt(
        np.array([
            np.diag(model.covars_[0]),
            np.diag(model.covars_[1]),
            np.diag(model.covars_[2]),
            np.diag(model.covars_[3])
        ])))
P = np.array(model.transmat_)

# %%

print(model.covars_)
def hmmmodel(seq):
    model = GaussianHMM(n_components=2, n_iter=1000)
    model.fit(seq)
    hidden_states = model.predict(seq)
    return model, hidden_states
start = datetime.datetime(2013, 1, 1)
end = pd.datetime.today()
df = web.DataReader("GOOGL", 'google', start, end)

datestart = '20130101'
dateend = '20160101'
# dates, close_v, volume_v, high_v, open_v, low_v = get_value_by_dates(df, datestart, dateend)
# X = np.column_stack([close_v, volume_v, high_v, open_v, low_v])
X, dates, close_v, volume_v, high_v, open_v, low_v = get_value_by_dates(
    df, datestart, dateend)
model = GaussianHMM(n_components=100,
                    covariance_type="tied",
                    n_iter=100,
                    init_params='m',
                    verbose=True).fit(X)
hidden_states = model.predict(X)
print(hidden_states)

# print("Transition matrix")
# print(model.transmat_)
# print()

print("Means and vars of each hidden state")
for i in range(model.n_components):
    print("{0}th hidden state".format(i))
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covars_[i]))
    print()

# fig, axs = plt.subplots(model.n_components, sharex=True, sharey=True)
# colours = cm.rainbow(np.linspace(0, 1, model.n_components))
Beispiel #14
0
def pain_state(path, hmm_train_days=0, n_components=3):

    with open(path, 'r') as f:
        titles = f.readline()[:-1].split(',')
        datas = []
        need_to_append = (not titles[-1] == 'state')
        if need_to_append:
            titles.append('state')
        line = f.readline()
        while line:
            line = line[:-1].split(',')
            if need_to_append:
                line.append('0')
            datas.append(line)
            line = f.readline()

    with open(path, 'w') as f:
        f.write('%s\n' % (','.join(titles)))
        index = 0
        open_price = []
        high_price = []
        low_price = []
        close_price = []
        volume = []
        min_train_days = hmm_train_days + 5
        while index < len(datas) and (len(close_price) < min_train_days - 1
                                      or hmm_train_days == 0):
            f.write('%s\n' % (','.join(datas[index])))
            v = float(datas[index][5])
            if v > 0:
                open_price.append(float(datas[index][1]))
                high_price.append(float(datas[index][2]))
                low_price.append(float(datas[index][3]))
                close_price.append(float(datas[index][4]))
                volume.append(v)
            index += 1

        while index < len(datas):
            v = float(datas[index][5])
            if v > 0:
                open_price.append(float(datas[index][1]))
                high_price.append(float(datas[index][2]))
                low_price.append(float(datas[index][3]))
                close_price.append(float(datas[index][4]))
                volume.append(v)

                logDel = np.log(np.array(
                    high_price[-hmm_train_days:])) - np.log(
                        np.array(low_price[-hmm_train_days:]))
                logRet_2 = np.log(np.array(
                    close_price[-hmm_train_days:])) - np.log(
                        np.array(open_price[-hmm_train_days - 2:-2]))
                logRet_5 = np.log(np.array(
                    close_price[-hmm_train_days:])) - np.log(
                        np.array(close_price[-hmm_train_days - 5:-5]))
                logVol_5 = np.log(np.array(volume[-hmm_train_days:])) - np.log(
                    np.array(volume[-hmm_train_days - 5:-5]))
                A = np.column_stack([logVol_5, logRet_5, logRet_2])
                model = GaussianHMM(n_components=n_components,
                                    covariance_type='full',
                                    n_iter=16).fit(A)
                means = np.array([ele[1] for ele in model.means_])
                ids = np.argsort(means)
                state = model.predict(A)[-1]
                for i in range(n_components):
                    if state == ids[i]:
                        state = i + 1
                        break
                datas[index][-1] = "%d" % state
            f.write('%s\n' % (','.join(datas[index])))
            index += 1
Beispiel #15
0
close_v = data['close'].values
volume = data['volume'].values
dates = np.array([i for i in range(data.shape[0])])
fig1 = plt.figure()
plt.plot(close_v, color='blue')
plt.show()
fig1.savefig('stocks.svg')

# 处理数据
diff = np.diff(close_v)
dates = dates[1:]
close_v = close_v[1:]
volume = volume[1:]
x = np.column_stack([diff, volume])
diff = diff.reshape(-1, 1)  # 二维矩阵

model = GaussianHMM(n_components=2,
                    n_iter=1000)  # n_components 状态序列的种类,n_iter 迭代次数
model.fit(diff)
hidden_states = model.predict(diff)
fig2 = plt.figure()
colors = ['yellow', 'blue']
for j in range(len(close_v) - 1):
    for i in range(model.n_components):
        if hidden_states[j] == i:
            plt.plot([dates[j], dates[j + 1]], [close_v[j], close_v[j + 1]],
                     color=colors[i])
plt.show()
fig2.savefig('hidden_state.svg')
# 分为震荡和剧烈涨跌
# +
hmm = GaussianHMM(n_components=2,
                  means_prior=np.zeros((1, 1)),
                  means_weight=1e10).fit(dx.reshape(-1, 1))

# rearrange the volatility from small to large
sigma2 = hmm.covars_.flatten()
idx = np.argsort(sigma2)
sigma2 = sigma2[idx]
p = hmm.transmat_[np.ix_(idx, idx)]  # transaction matrix
# -

# ## [Step 3](https://www.arpm.co/lab/redirect.php?permalink=s_hidden_markov_model_stocks-implementation-step03): Compute the hidden status

z_ = hmm.predict(dx.reshape(-1, 1))
z = z_.copy()
z[z_ == 0] = idx[0]
z[z_ == 1] = idx[1]

# ## Plots

# +
plt.style.use('arpm')

panic = dx.copy()
calm = dx.copy()
panic[z == 0] = np.nan
calm[z == 1] = np.nan

fig = plt.figure()
print oddsr1

#7. Evaluate the effect of the online channel and billpay on customer's retention using a hidden Markov model (HMM) with the variables 9Online, 9Billpay, 0Online, 0Billpay and the new variable Retain. Retain takes a value of 0 when 0Profit has a missing observation and 1 otherwise.

#print pd.value_counts(df['0Online'].isnull())

#print pd.value_counts(df['0Billpay'].isnull())

df['0OnlineNA'] = np.where(df['0Online'].isnull(), 0, df['0Online'])
df['0BillpayNA'] = np.where(df['0Billpay'].isnull(), 0, df['0Billpay'])
df.head()

from hmmlearn.hmm import GaussianHMM
group = df[['9Online', '9Billpay', '0OnlineNA', '0BillpayNA', 'Retain']]
model = GaussianHMM(n_components=4, covariance_type="diag").fit(group)
hidden_states = model.predict(group)

for i in range(model.n_components):
    print("{0}th hidden state".format(i))
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covars_[i]))
    print()

#hdf=pd.DataFrame(hidden_states)
#hdf.describe() #verifying the meaning of the hidden states

#8. Build a transition matrix (online, billpay) from 1999 to 2000 from those different customers' states:those that were online, offline without electronic billpay, online with electronic billpay for 2000, customers who left the bank. Explain the billpay effect on customers' retention.
print("Transition matrix")
print(model.transmat_)
print()
Beispiel #18
0
        variances.append(np.var(dists))
    return variances


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--features-path", type=str)
    parser.add_argument("--save-path", type=str, default=None)

    fileConfig('logging_config.ini')

    args = parser.parse_args()

    save_path = Path(args.save_path)
    if not save_path.parent.exists():
        save_path.parent.mkdir()

    features_path = Path(args.features_path)
    X = np.load(str(features_path))
    hmm_n_clusters = 2
    hmm = GaussianHMM(n_components=hmm_n_clusters, covariance_type="diag")
    hmm.fit(X)
    hmm_preds = hmm.predict(X)
    variances = get_cluster_variances(X, hmm_preds, hmm.means_)
    court_cluster_id = np.argmin(variances)
    mask = hmm_preds == court_cluster_id
    mask = [{'action': int(i)} for i in mask]
    utils.write_json_lines(mask, save_path)

    # np.save(str(save_path), mask)
Beispiel #19
0
    def test_hmm(self):
        sns.set_style('white')

        beginDate = '20100401'
        endDate = '20160317'
        data = DataAPI.MktIdxdGet(ticker='000001',
                                  beginDate=beginDate,
                                  endDate=endDate,
                                  field=[
                                      'tradeDate', 'closeIndex', 'lowestIndex',
                                      'highestIndex', 'turnoverVol'
                                  ],
                                  pandas="1")
        data1 = DataAPI.FstTotalGet(exchangeCD=u"XSHE",
                                    beginDate=beginDate,
                                    endDate=endDate,
                                    field=['tradeVal'],
                                    pandas="1")
        data2 = DataAPI.FstTotalGet(exchangeCD=u"XSHG",
                                    beginDate=beginDate,
                                    endDate=endDate,
                                    field=['tradeVal'],
                                    pandas="1")
        tradeVal = data1 + data2
        tradeDate = pd.to_datetime(data['tradeDate'][5:])
        volume = data['turnoverVol'][5:]
        closeIndex = data['closeIndex']
        deltaIndex = np.log(np.array(data['highestIndex'])) - np.log(
            np.array(data['lowestIndex']))
        deltaIndex = deltaIndex[5:]
        logReturn1 = np.array(np.diff(np.log(closeIndex)))
        logReturn1 = logReturn1[4:]
        logReturn5 = np.log(np.array(closeIndex[5:])) - np.log(
            np.array(closeIndex[:-5]))
        logReturnFst = np.array(np.diff(np.log(tradeVal['tradeVal'])))[4:]
        closeIndex = closeIndex[5:]
        X = np.column_stack(
            [logReturn1, logReturn5, deltaIndex, volume, logReturnFst])

        # Make an HMM instance and execute fit
        model = GaussianHMM(n_components=6,
                            covariance_type="diag",
                            n_iter=1000).fit([X])
        # Predict the optimal sequence of internal hidden state
        hidden_states = model.predict(X)

        #print("Transition matrix")
        #print(model.transmat_)
        #print()

        #print("Means and vars of each hidden state")
        #for i in range(model.n_components):
        #    print("{0}th hidden state".format(i))
        #    print("mean = ", model.means_[i])
        #    print("var = ", np.diag(model.covars_[i]))

        plt.figure(figsize=(15, 8))
        for i in range(model.n_components):
            idx = (hidden_states == i)
            plt.plot_date(tradeDate[idx],
                          closeIndex[idx],
                          '.',
                          label='%dth hidden state' % i,
                          lw=1)
            plt.legend()
            plt.grid(1)
deltaIndex = np.log(np.array(data['highestIndex'])) - np.log(
    np.array(data['lowestIndex']))  #3 当日对数高低价差
deltaIndex = deltaIndex[5:]
logReturn1 = np.array(np.diff(np.log(closeIndex)))  #4 对数收益率
logReturn1 = logReturn1[4:]
logReturn5 = np.log(np.array(closeIndex[5:])) - np.log(
    np.array(closeIndex[:-5]))  # 5日 对数收益差
logReturnFst = np.array(np.diff(np.log(tradeVal['tradeVal'])))[4:]
closeIndex = closeIndex[5:]
X = np.column_stack([logReturn1, logReturn5, deltaIndex, volume,
                     logReturnFst])  # 将几个array合成一个2Darray
# Make an HMM instance and execute fit
model = GaussianHMM(n_components=3, covariance_type="diag",
                    n_iter=1000).fit([X])
# Predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)
print hidden_states
res = pd.DataFrame({
    'tradeDate': tradeDate,
    'logReturn1': logReturn1,
    'logReturn5': logReturn5,
    'volume': volume,
    'hidden_states': hidden_states
}).set_index('tradeDate')
for i in range(model.n_components):
    idx = (hidden_states == i)
    idx = np.append(0, idx[:-1])  #获得状态结果后第二天进行买入操作
    #fast factor backtest
    df = res.logReturn1
    res['sig_ret%s' % i] = df.multiply(idx, axis=0)
    res['sig_cumret%s' % i] = np.exp(res['sig_ret%s' % i].cumsum())
def train_a_stock(key, num_hidden_states):

    stock = StockDb()
    stock.df['fracchange'] = (stock.df['close'] -
                              stock.df['open']) / stock.df['open']
    stock.df['fraclow'] = (stock.df['low'] -
                           stock.df['open']) / stock.df['open']
    stock.df['frachigh'] = (stock.df['high'] -
                            stock.df['open']) / stock.df['open']
    stock.df['frachighlow'] = (stock.df['high'] -
                               stock.df['low']) / stock.df['low']
    stock.df['delta-volume'] = stock.df['volume'].diff().fillna(0)
    stock.df['delta-open'] = stock.df['open'].diff().fillna(0)
    stock.df['delta-close'] = stock.df['close'].diff().fillna(0)
    stock.df['delta-closeopen'] = stock.df['open'] - stock.df['close'].shift(
        -1).fillna(0)

    startdate = datetime.datetime(2018, 3, 6, 0, 0)
    enddate = datetime.datetime(2018, 4, 2, 0, 0)
    nbpredict = 40

    features = ['delta-closeopen', 'fracchange', 'delta-volume']
    features_predict = ['date', 'open', 'low', 'high', 'close']

    predict_df = pd.DataFrame(columns=features_predict)
    for i in range(nbpredict):
        try:
            train_df = stock.build_training(key, startdate, enddate, features)
            X, lengths = train_df.seq_len_dict(key)
            model = GaussianHMM(n_components=num_hidden_states,
                                n_iter=1000).fit(X, lengths)
            logL = model.score(X, lengths)
            state_sequence = model.predict(X, lengths)
            prob_next_step = model.transmat_[state_sequence[-1], :]
            state_most_probable = state_sequence[0]
            state_feaures = model.means_[state_most_probable]

            result = {}
            result['date'] = train_df.nextdata['date'].iloc[-2]
            result['logL'] = logL
            result['nbState'] = model.n_components
            result['state_most_probable'] = state_most_probable
            result['prob_state_most_probable'] = max(prob_next_step)

            result['open'] = train_df.nextdata['close'].iloc[
                -1] + state_feaures[features.index('delta-closeopen')]
            result['close'] = result['open'] * (
                1 + state_feaures[features.index('fracchange')])
            result['fracchange'] = state_feaures[features.index('fracchange')]
            result['delta-closeopen'] = state_feaures[features.index(
                'delta-closeopen')]
            predict_df = predict_df.append(result, ignore_index=True)
        except:
            print("err")

        enddate = train_df.nextdata['date'].iloc[-2]

    real_df = stock.df[stock.df['symbol'] == key]
    res_df = pd.merge(real_df, predict_df, on='date')
    res_df['err'] = (res_df['fracchange_x'] -
                     res_df['fracchange_y']) / res_df['fracchange_x']
    print("----------------------------------------")

    # ========================
    # Plot the data
    # ========================
    fig, axes = plt.subplots(nrows=1, ncols=2)

    res_df.plot(kind='line',
                color='Blue',
                x='date',
                y='fracchange_x',
                ax=axes[0],
                title='fracchange')
    res_df.plot(kind='line',
                color='red',
                x='date',
                y='fracchange_y',
                ax=axes[0])

    res_df.plot(kind='line',
                color='Blue',
                x='date',
                y='err',
                ax=axes[1],
                title='fraclow')
    #res_df.plot(kind='line', color='red', x='date', y='fraclow_y', ax=axes[1])

    plt.show()

    print(res_df[['date', 'fracchange_x', 'fracchange_y']])

    return result
Beispiel #22
0
import numpy as np
from numpy import genfromtxt
from hmmlearn.hmm import GaussianHMM

label_data = genfromtxt('Label', delimiter=',')
observation_data = genfromtxt('Observations.csv', delimiter=',')
runs = [[] for _ in range(6000)]
for i in range(len(label_data)):
    tuple = label_data[i]
    run = int(tuple[0])-1
    step = int(tuple[1])-1
    angle = observation_data[run][step]
    x = tuple[2]
    y = tuple[3]
    runs[run].append([x,y,angle])

model2 = GaussianHMM(n_components=4, covariance_type="diag", n_iter=1000).fit(runs[0])
preds = model2.predict(runs[1])

print(preds)


Beispiel #23
0
date_list = train_data['update_date'][5:]

r_5_test = np.array(np.log(test_data['close'][5:])) - np.array(
    np.log(test_data['close'][:-5]))
r_1_test = (np.array(np.log(test_data['close'][1:])) -
            np.array(np.log(test_data['close'][:-1])))[4:]
r_range_test = (np.array(np.log(test_data['high'])) -
                np.array(np.log(test_data['low'])))[5:]
date_list_test = test_data['update_date'][5:]

X = np.column_stack([r_1, r_5, r_range])

X_test = np.column_stack([r_1_test, r_5_test, r_range_test])

hmm = GaussianHMM(n_components=13, covariance_type='diag', n_iter=5000).fit(X)
latent_states_sequence_train = hmm.predict(X)
len(latent_states_sequence_train)

sns.set_style('white')

mean_return_dict = {}
plt.figure(figsize=(15, 8))
for i in range(hmm.n_components):
    state = (latent_states_sequence_train == i)
    plt.plot(date_list[state],
             train_data['close'][state],
             'o',
             label='latent state %d' % i,
             lw=1)
    plt.legend()
    plt.grid(1)
Beispiel #24
0
@author: mac
"""

#%%
df = pd.read_csv('trainDemo.csv', encoding="utf-8")
df.iloc[:, 1].plot()
dataset_X = df.iloc[:, 1].values.reshape(1, -1).T

print(dataset_X.shape)
#%%
from hmmlearn.hmm import GaussianHMM
model = GaussianHMM(n_components=8, covariance_type="diag", n_iter=1000)
model.fit(dataset_X)
#%%
hidden_states = model.predict(dataset_X)
#%%
for i in range(model.n_components):  # 打印出每个隐含状态
    mean = model.means_[i][0]
    variance = np.diag(model.covars_[i])[0]
    print('Hidden state: {}, Mean={:.3f}, Variance={:.3f}'.format(
        (i + 1), mean, variance))
#%%
# 使用HMM模型生成数据
N = 2348
samples, _ = model.sample(N)
plt.plot(samples[:, 0])

#%%
print(samples)
import numpy
Beispiel #25
0

# HMMMLearn
####################################################################################
####################################################################################
####################################################################################

import numpy as np
from hmmlearn.hmm import GaussianHMM

new_x = np.asarray(x_train)

n_comps = 6
model = GaussianHMM(n_comps)
model.fit([new_x])
hidden_states = model.predict(new_x)


###############################################################################
# print trained parameters and plot

import pylab as pl
from matplotlib.finance import quotes_historical_yahoo
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter

print("Transition matrix")
print(model.transmat_)
print()

print("means and vars of each hidden state")
for i in range(n_comps):
Beispiel #26
0
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
from hmmlearn.hmm import GaussianHMM
import numpy as np

#samples:
X = np.array([[-1.03573482, -1.03573482], [6.62721065, 11.62721065],
              [3.19196949, 8.19196949], [0.38798214, 0.38798214],
              [2.56845104, 7.56845104], [5.03699793, 10.03699793],
              [5.87873937, 10.87873937], [4.27000819, -1.72999181],
              [4.02692237, -1.97307763], [5.7222677, 10.7222677]])

# Trainning a new model over samples:
model = GaussianHMM(n_components=3, covariance_type="diag").fit(X)

# Create a new copy of the trained model:
new_model = GaussianHMM(n_components=3, covariance_type="diag")
new_model.startprob_ = model.startprob_
new_model.transmat_ = model.transmat_
new_model.means_ = model.means_
m = model._covars_
n = model.covars_
p = model.get_params()
new_model.covars_ = model._covars_

# Predict from X:
X_N = new_model.predict(X)

print(X_N)
Beispiel #27
0
import pylab as pl
import numpy as np
from hmmlearn.hmm import GaussianHMM
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
import nyc

###############################################################################
# print trained parameters and plot
###############################################################################

new_x = np.asarray(train_set)

n_comps = 6
model = GaussianHMM(n_comps)
model.fit([new_x])
hidden_states = model.predict(new_x)

print("means and vars of each hidden state")
for i in range(n_comps):
    print("%dth hidden state" % i)
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covars_[i]))
    print()

years = YearLocator()   # every year
months = MonthLocator()  # every month
yearsFmt = DateFormatter('%Y')
fig = pl.figure()
ax = fig.add_subplot(111)

ald = np.asarray(all_days)
Beispiel #28
0
endTime = '2018-12-1'
data = tu.get_hist_data('sh', beginTime, endTime, 'D')
high = data['high']
low = data['low']

volume = data['volume']
close = data['close'][5:]
Date = pd.to_datetime(data.index[5:])
print(Date)
logDel = (np.log(np.array(high)) - np.log(np.array(low)))[5:]
logRet1 = np.array(np.diff(np.log(close)))[5:]
logRet5 = np.log(np.array(close[5:])) - np.log(np.array(close[:-5]))  # 指數對數收益差
logVol5 = np.log(np.array(volume[5:])) - np.log(np.array(
    volume[:-5]))  # 指數對數交易量差
plt.hist(logVol5, 200, normed=1, facecolor='green', alpha=0.75)
# plt.show()
# print(logDel)
A = np.column_stack([logDel[:100], logRet5[:100], logVol5[:100]])  #1D-2D
print(A)
model = GaussianHMM(n_components=6, covariance_type='diag', n_iter=2000).fit(A)
hidden_states = model.predict(A)
print(hidden_states)
plt.figure(figsize=(10, 5))
sns.set_style('white')
for i in range(model.n_components):
    pos = (hidden_states == i)
    plt.plot(Date[i], close[i], 'o', label='hidden state %d' % i, lw=360)
    plt.legend()
    plt.grid(10)
plt.show()
#spx_ret = spx_ret * 1000.0
rets = np.column_stack([spx_ret])

# Create the Gaussian Hidden markov Model and fit it
# to the SPY returns data, outputting a score
hmm_model = GaussianHMM(
    n_components=3,                     # number of states
    covariance_type="full",             # full covariance matrix vs diagonal
    n_iter=1000                         # number of iterations
).fit(rets)

print("Model Score:", hmm_model.score(rets))

# Plot the in sample hidden states closing values
# Predict the hidden states array
hidden_states = hmm_model.predict(rets)

print('Percentage of hidden state 1 = %f' % (sum(hidden_states)/len(hidden_states)))

print("Transition matrix")
print(hmm_model.transmat_)

print("Means and vars of each hidden state")
for i in range(hmm_model.n_components):                   # 0 is down, 1 is up
    print("{0}th hidden state".format(i))
    print("mean = ", hmm_model.means_[i])
    print("var = ", np.diag(hmm_model.covars_[i]))

fig, axs = plt.subplots(hmm_model.n_components, sharex=True, sharey=True)
colours = cm.rainbow(np.linspace(0, 1, hmm_model.n_components))
for i, (ax, colour) in enumerate(zip(axs, colours)):
def predict_states(X,group_id,empirical_states):
	#print("fitting to HMM and decoding ...")
	max_state_number = (group_id+1)*10
	n_components = 2
	
	# make an HMM instance and execute fit
	model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)
	
	# Train n number of HMM to avoid loacl minimal 
	max_score = 0
	max_proba_states = []
	transmat = [[]]
	n = 2
	for i in range(1,n):
		model.fit([X])
		score = model.decode(X)[0]
		if i==1 or max_score < score:
			max_score = score
			max_proba_states = model.predict(X)
			transmat = model.transmat_
		
		'''	
		print "score", score
		# predict the optimal sequence of internal hidden state
		hidden_states = model.predict(X)
		print hidden_states
		'''
	# end multiple training
	
	#print max_score, max_proba_states, transmat
	
	# Compare the state with empirical states
	max_proba_states = max_proba_states.tolist()
	max_proba_states_inver = []
	for s in max_proba_states:
		max_proba_states_inver.append(0 if s == 1 else 1)
	
	#print empirical_states, max_proba_states, max_proba_states_inver
	
	difference_state = np.subtract(np.array(max_proba_states),np.array(empirical_states)).tolist()
	difference_state_inver = np.subtract(np.array(max_proba_states_inver),np.array(empirical_states)).tolist()
	
	difference = np.sum(np.power(difference_state,2))
	difference_inver = np.sum(np.power(difference_state_inver,2))
	
	#print difference, difference_inver
	
	if(difference_inver < difference):
		max_proba_states = max_proba_states_inver
	# end switch bits
	
	# Predict future state
	future_states_proba = np.dot([0,1],transmat)
	future_state = 0
	if future_states_proba[1] > future_states_proba[0]:
		future_state = 1	
	# End
	
	result_states = max_proba_states+[future_state for i in range(0,max_state_number-len(max_proba_states))];
	return result_states		
	print("done\n")
Beispiel #31
0
def MyGaussianHMM():
    from hmmlearn.hmm import GaussianHMM
    df = pd.read_csv(
        "/home/ray/Documents/suibe/2017/建模/Modeling_Preparation/dataset/SZIndex.csv",
        header=-1)
    df.head()
    X = np.array(df.iloc[:, 0:5])

    # 一、未知模型情况下,解决问题3
    model = GaussianHMM(n_components=6, covariance_type="diag",
                        n_iter=1000)  # 方差矩阵为对角阵
    """
    参数解释:
    covariance_type:
        "spherical"     :主对角元素均为1,其余元素为0,独立同分布  (数据不足时,难以进行参数估计)
        "diag"          :主对角元素不为0,其余为0               (一般情况,折中)
        "full"          :所有元素均不为0                      (数据足够进行参数估计时)
    """
    model.fit(X)
    print "隐含状态为: ", model.predict(X)  # 列出每一天的隐含状态
    print "特征数目 %s" % model.n_features
    print "隐状态数目 %s" % model.n_components
    print "起始概率 :", model.startprob_
    print "隐状态转移矩阵", model.transmat_
    ## 每个隐含层对应的特征概率空间假设为正态分布,则可以得到一个model.n_components行model.n_features列的均值矩阵
    print "混淆矩阵:均值部分", model.means_
    print "混淆矩阵:方差部分", model.covars_

    ## 绘图
    hidden_states = model.predict(X)
    tradeDate = df.iloc[:, 5].values
    closeIndex = df.iloc[:, 6].values
    plt.figure(figsize=(15, 8))
    for i in range(model.n_components):
        idx = (hidden_states == i)
        plt.plot_date(pd.to_datetime(tradeDate[idx]),
                      closeIndex[idx],
                      '.',
                      label='%dth hidden state' % i,
                      lw=1)
        plt.legend()
        plt.grid(1)
    plt.show()

    # 二、已知模型情况下,解决问题1,2

    ## 沿用上述模型
    ### 问题1
    print "某天出现该观测的概率为: %s" % np.exp(model.score(X[0]))
    ### 问题2
    log_prob, state = model.decode(X[:10], algorithm="viterbi")
    print "只根据前十天,推断出最有可能的隐含状态序列为:", state

    ## 自己输入模型参数
    ### 一个2特征,4隐状态情况
    startprob = np.array([0.6, 0.3, 0.1, 0.0])
    # The transition matrix, note that there are no transitions possible
    # between component 1 and 3
    transmat = np.array([[0.7, 0.2, 0.0, 0.1], [0.3, 0.5, 0.2, 0.0],
                         [0.0, 0.3, 0.5, 0.2], [0.2, 0.0, 0.2, 0.6]])
    # The means of each component
    means = np.array([[0.0, 0.0], [0.0, 11.0], [9.0, 10.0], [11.0, -1.0]])
    # The covariance of each component
    covars = .5 * np.tile(np.identity(2), (4, 1, 1))
    model2 = GaussianHMM(n_components=4, covariance_type="full", n_iter=1000)
    model2.startprob_ = startprob
    model2.transmat_ = transmat
    model2.means_ = means
    model2.covars_ = covars
diff = np.diff(close_v)
dates = dates[1:]
close_v = close_v[1:]

# Pack diff and volume for training.
X = np.column_stack([diff, volume])

###############################################################################
# Run Gaussian HMM
print("fitting to HMM and decoding ...", end="")

# Make an HMM instance and execute fit
model = GaussianHMM(n_components=4, covariance_type="diag", n_iter=1000).fit(X)

# Predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

print("done")

###############################################################################
# Print trained parameters and plot
print("Transition matrix")
print(model.transmat_)
print()

print("Means and vars of each hidden state")
for i in range(model.n_components):
    print("{0}th hidden state".format(i))
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covars_[i]))
    print()
Beispiel #33
0
# 08-09
# 09-10
# 10-11

# 12-13
# 13-14
# 14-15

"Número de estados deseados"
Nc = 3

" Se entrena el HMM y se estima la serie de estados probables"
wind_leap = wind.reshape(-1, 1)
model = GaussianHMM(n_components=Nc, covariance_type="diag",
                    n_iter=1000).fit(wind_leap)
hidden_states = model.predict(wind_leap)

" Matriz de estados, donde cada fila es un año de estados"
state_matrix = np.reshape(hidden_states, (27, 120))
state_matrix = state_matrix + 1

state_matrix[state_matrix == 3] = 11
state_matrix[state_matrix == 1] = 33
# state_matrix[state_matrix == 2] = 55

state_matrix[state_matrix == 33] = 3
state_matrix[state_matrix == 11] = 1
# state_matrix[state_matrix == 55] = 5

# Dos estados
if Nc == 2:
def runHmm(patient_record,date_list,group_id,empirical_states):
###############################################################################
# Processing the data
	max_state_number = (group_id+1)*10
	
	X = np.zeros(shape=(max(len(patient_record),2),20))
	index = 0
	for date in date_list:
		tmp_list = []
		#print(date)
		for key, value in patient_record[date].iteritems():
			tmp_list.append(value)
		X[index] = np.array(tmp_list)
		index+=1
		
	# if no lab test is available, train with an all zero array
	if X.shape[0]  == 0:
		X = np.zeros(shape=(2,20))
	elif X.shape[0] == 1:
		X[1] = np.zeros(shape=(1,20))
		
	#print(X)	
	#print(X.shape)
	
###############################################################################
# Run Gaussian HMM
	print("fitting to HMM and decoding ...")
	n_components = 2
	
	# make an HMM instance and execute fit
	model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)
	
	# Train n number of HMM to avoid loacl minimal 
	max_score = 0
	max_proba_states = []
	transmat = [[]]
	n = 2
	for i in range(1,n):
		model.fit([X])
		score = model.decode(X)[0]
		if i==1 or max_score < score:
			max_score = score
			max_proba_states = model.predict(X)
			transmat = model.transmat_
		
		'''	
		print "score", score
		# predict the optimal sequence of internal hidden state
		hidden_states = model.predict(X)
		print hidden_states
		'''
	# end multiple training
	
	#print max_score, max_proba_states, transmat
	
	# Compare the state with empirical states
	max_proba_states = max_proba_states.tolist()
	max_proba_states_inver = []
	for s in max_proba_states:
		max_proba_states_inver.append(0 if s == 1 else 1)
	
	#print empirical_states, max_proba_states, max_proba_states_inver
	
	difference_state = np.subtract(np.array(max_proba_states),np.array(empirical_states)).tolist()
	difference_state_inver = np.subtract(np.array(max_proba_states_inver),np.array(empirical_states)).tolist()
	
	difference = np.sum(np.power(difference_state,2))
	difference_inver = np.sum(np.power(difference_state_inver,2))
	
	#print difference, difference_inver
	
	if(difference_inver < difference):
		max_proba_states = max_proba_states_inver
	# end switch bits
	
	# Predict future state
	future_states_proba = np.dot([0,1],transmat)
	future_state = 0
	if future_states_proba[1] > future_states_proba[0]:
		future_state = 1	
	# End
	
	result_states = max_proba_states+[future_state for i in range(0,max_state_number-len(max_proba_states))];
	
	return result_states
	'''
	state = [0,1]
	transmat = np.array(model.transmat_)
	
	print np.dot(state,transmat)
	
	print np.array(model.transmat_)
	
	#print (hidden_states)
	#print (hidden_states.shape)
	'''
		
	print("done\n")
Beispiel #35
0
                            alpha=0.75)

plt.show()

# Observation sequences matrix
A = np.column_stack([logDel, logRet_5, logVol_5])

# Rescaled observation sequences matrix
rescaled_A = np.column_stack(
    [rescaled_boxcox_logDel, rescaled_logRet_5, rescaled_logVol_5])

# HMM modeling based on raw observation sequences

model = GaussianHMM(n_components=3, covariance_type="full",
                    n_iter=2000).fit([A])
hidden_states = model.predict(A)
hidden_states

# Plot the hidden states
plt.figure(figsize=(25, 18))
for i in range(model.n_components):
    pos = (hidden_states == i)
    plt.plot_date(Date[pos],
                  close[pos],
                  'o',
                  label='hidden state %d' % i,
                  lw=2)
    plt.legend(loc="left")

# Trading test according to the hidden states
for i in range(3):
         fillValue = 30.0
     elif parameter == 'Length':
         fillValue = 325.0
     else:
         fillValue = 0
     if (parameter + '_smoothed') not in fbf.columns:
         fbf[parameter] = fbf[parameter].fillna(method='pad', limit=5).fillna(fillValue)
         fbf = smooth(fbf, parameter)
         fbf.to_pickle(directory + '/frame_by_frame_synced.pickle')
 
 #CREATE HIDDEN MARKOV MODEL
 
 _fbf = fbf.loc[fbf['synced_time'] > np.timedelta64(0,'ns')]  #take only post-stimulus data
 X = np.column_stack(_fbf[ i +'_smoothed'] for i in parameters)
 
 state_values = pd.DataFrame(THE_model.predict(X), columns=['state'])
 #DISCARD CASES WHERE ONE OR MORE STATES OCCURS RARELY (<1%).
 DISCARD = False
 for i in list(set(state_values['state'])):
     if (len(state_values[state_values['state']==i]) / float(len(state_values)) < 0.005) & (len(state_values[state_values['state']==i]) >0):
         print i, len(state_values), len(state_values[state_values['state'] == i]), '\t', FLY_ID
         state_values.loc[state_values['state']==i, 'state'] = np.nan
         #DISCARD = True
 state_values['state'] = state_values['state'].fillna(method='pad').fillna(method='bfill')
 state_values = np.array(state_values['state']) 
 
 statesdf = pd.DataFrame(state_values, columns=['state'], index = _fbf.index)
 statesdf['FLY_ID'] = FLY_ID
 try:
     statesdf['GROUP'] = GROUP
     statesdf.to_pickle(directory + '/states.pickle')
Beispiel #37
0
def predict_states(X, group_id, empirical_states):
    #print("fitting to HMM and decoding ...")
    max_state_number = (group_id + 1) * 10
    n_components = 2

    # make an HMM instance and execute fit
    model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)

    # Train n number of HMM to avoid loacl minimal
    max_score = 0
    max_proba_states = []
    transmat = [[]]
    n = 2
    for i in range(1, n):
        model.fit([X])
        score = model.decode(X)[0]
        if i == 1 or max_score < score:
            max_score = score
            max_proba_states = model.predict(X)
            transmat = model.transmat_
        '''	
		print "score", score
		# predict the optimal sequence of internal hidden state
		hidden_states = model.predict(X)
		print hidden_states
		'''
    # end multiple training

    #print max_score, max_proba_states, transmat

    # Compare the state with empirical states
    max_proba_states = max_proba_states.tolist()
    max_proba_states_inver = []
    for s in max_proba_states:
        max_proba_states_inver.append(0 if s == 1 else 1)

    #print empirical_states, max_proba_states, max_proba_states_inver

    difference_state = np.subtract(np.array(max_proba_states),
                                   np.array(empirical_states)).tolist()
    difference_state_inver = np.subtract(np.array(max_proba_states_inver),
                                         np.array(empirical_states)).tolist()

    difference = np.sum(np.power(difference_state, 2))
    difference_inver = np.sum(np.power(difference_state_inver, 2))

    #print difference, difference_inver

    if (difference_inver < difference):
        max_proba_states = max_proba_states_inver
    # end switch bits

    # Predict future state
    future_states_proba = np.dot([0, 1], transmat)
    future_state = 0
    if future_states_proba[1] > future_states_proba[0]:
        future_state = 1
    # End

    result_states = max_proba_states + [
        future_state
        for i in range(0, max_state_number - len(max_proba_states))
    ]
    return result_states
    print("done\n")
Beispiel #38
0
dim_h = 5
N_train = 500
n_stocks = 1
X = in_data[:N_train,:(n_stocks*3)]
n_factors = X.shape[1] / n_stocks

# Make an HMM instance and execute fit

model = GaussianHMM(n_components=dim_h, covariance_type="diag", 
					n_iter=1000).fit(in_data_ema[:(N_train),:])

RMSE_train = np.zeros(N_train)
ER_train = np.zeros(N_train)

# Predict the optimal sequence of internal hidden state
hidden_states = model.predict(in_data_ema[:N_train,:])
	state_cur = hidden_states[i]
	# model.transmat_
	pred_ind = np.arange(n_stocks) * n_factors
	
	mean_cur = model.means_[state_cur,:]
	mean_pred = mean_cur[pred_ind]
	# need 
	prev_ema = in_data_ema[i,pred_ind]
	mean_pred = rm_ema(mean_pred, prev_ema, n_ema=n_ema)
	
	covar_cur = model.covars_[state_cur,:]
	covar_pred = covar_cur[pred_ind,:][:,pred_ind]
	covar_pred = rm_ema(covar_pred, 0, n_ema=n_ema)

	y_true = in_data[(i+1),pred_ind]
Beispiel #39
0
plt.show()
'''for i in range(3,30):
    km = GaussianMixture(n_components = i, covariance_type = 'diag').fit(dt1)
    bc.append(km.bic(dt1))
    
plt.plot(bc)
plt.show()


vec = km.predict(dt1)

plt.scatter(dt[0,:], dt[1,:], c=vec)
plt.show()

print km.bic(dt1)'''

#%%
#y1 = dt[:,700:1500]
#y2 = dt[:,1600:2000]
#y3 =np.append(y1,y2, axis=1)
y3 = dt[:, 220:270]

md = GaussianHMM(n_components=7, n_iter=100).fit(np.transpose(y3))

print md.score(np.transpose(y3))
plt.plot(md.predict(dt1))
plt.show()

#joblib.dump(md, "Clasificadores/md7.pkl")
Beispiel #40
0
data_label = activity_data.as_matrix()

test_feature = feature_test.as_matrix()
test_label = activity_test.as_matrix()

lengths = data_feature.shape[0]

# --- Run Gaussian HMM --- #
print "fitting to HMM and decoding ..."

# --- Make an HMM instance and execute fit --- #
model = GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000).fit(data_feature)

# --- Predict the optimal sequence of internal hidden state FOR DATA CSV!--- #
# --- the following is generating figure #1, and it predicts state sequence from DATA csv --- #
hidden_states = model.predict(data_feature)

time_axis = np.asarray(range(len(hidden_states)))

# --- fancy plots of different states in HMM --- #
fig1_data,axs = plt.subplots(model.n_components, sharex=True, sharey=True)
fig1_data.suptitle('Estimated State Sequence for Training Data')
colours = cm.rainbow(np.linspace(0, 1, model.n_components))
for i, (ax, colour) in enumerate(zip(axs, colours)):
	# --- Use fancy indexing to plot data in each state --- #
	mask = hidden_states == i
	ax.plot(time_axis[mask], data_feature[:,1][mask], ".", c=colour)
	ax.set_title("{0}th hidden state".format(i))
	ax.grid(True)

# --- the following is generating figure #2, and it plots actual label sequence from DATA csv --- #
Beispiel #41
0
from hmmlearn.hmm import GaussianHMM
from matplotlib import cm, pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator
import numpy as np
import pandas as pd
import seaborn as sns

client = bitmex.bitmex(test=IS_TEST, api_key=API_KEY, api_secret=API_SECRET)

prices = pd.DataFrame(
    client.Trade.Trade_getBucketed(
        binSize='1d',
        symbol='XBTUSD',
        count=1000,
        reverse=True,
    ).result()[0])

prices.set_index(['timestamp'], inplace=True)
prices = prices.sort_values(by='timestamp', ascending=True)

rets = np.column_stack([prices['close'].pct_change()])
rets[0] = 0

hmm_model = GaussianHMM(n_components=2, covariance_type="full",
                        n_iter=10).fit(rets)

print("Model Score:", hmm_model.score(rets))

hmm_model.predict(rets)

hmm_model.predict(rets2)[-1]