def predictWithHMM(index, window = 252): training_X = X[range(index-window,index),:] training_y = actual_y[range(index-window,index)] testing_X = X[index,:].reshape(1,training_X.shape[1]) testing_y = y[index] # PCA DATA if perform_pca: pca = PCA(n_components= pca_components) pca.fit(training_X) training_X = pca.transform(training_X) testing_X = pca.transform(testing_X) model = GaussianHMM(n_components, "diag",n_iter=1000) model.fit([training_X]) hidden_states = model.predict(training_X) predicted_hidden_state = model.predict(testing_X) # DO PROBALISTIC APPROACH # pr = model.predict_proba(testing_X) # print pr prob = 0 state_idx = (hidden_states == predicted_hidden_state) median_val = np.mean(training_y[state_idx]) return int(median_val>0), testing_y, prob
def predictWithHMM(index, window=252): training_X = X[range(index - window, index), :] training_y = actual_y[range(index - window, index)] testing_X = X[index, :].reshape(1, training_X.shape[1]) testing_y = y[index] # PCA DATA if perform_pca: pca = PCA(n_components=pca_components) pca.fit(training_X) training_X = pca.transform(training_X) testing_X = pca.transform(testing_X) model = GaussianHMM(n_components, "diag", n_iter=1000) model.fit([training_X]) hidden_states = model.predict(training_X) predicted_hidden_state = model.predict(testing_X) # DO PROBALISTIC APPROACH # pr = model.predict_proba(testing_X) # print pr prob = 0 state_idx = (hidden_states == predicted_hidden_state) median_val = np.mean(training_y[state_idx]) return int(median_val > 0), testing_y, prob
def train(X, n_components): ############################################################################### # Run Gaussian HMM print ("fitting to HMM and decoding ...") # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=2000) model.fit([X]) # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print ("done\n") ############################################################################### # print trained parameters and plot print ("Transition matrix") print (model.transmat_) print () print ("means and vars of each hidden state") for i in range(n_components): print ("%dth hidden state" % i) print ("mean = ", model.means_[i]) print ("var = ", np.diag(model.covars_[i])) print () return hidden_states, model
def use_hmm(img_times, change_vals, fps=10, min_secs_for_train_to_pass=8): from sklearn.hmm import GaussianHMM X = np.column_stack(change_vals) n_components = 2 model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) model.fit([X.T]) #thresh = 10**-15 #model.transmat_ = np.array([[1-thresh,thresh],[1-thresh,thresh]]) hidden_states = model.predict(X.T) # print trained parameters and plot print("Transition matrix") print(model.transmat_) print() print("means and vars of each hidden state") for i in range(n_components): print("%dth hidden state" % i) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i])) print() if model.means_[0][0] > model.means_[1][0]: # assume most most frames have no train, switch labels if necessary hidden_states = 1 - hidden_states train_spotted = filter_out_short_motions(hidden_states, min_secs_for_train_to_pass, fps) plot_timeline(img_times, change_vals, hidden_states, train_spotted) utils.copy_image_subset(config.experiment_data_frames, config.experiment_output_frames_hmm, np.nonzero(train_spotted)[0]) return train_spotted
def get_hmms(self): for gesture_type in self.gesture_types: print_status("Get_Hmms", "Fitting for gesture_type: " + gesture_type) ### Step 1: fill hmm_examples appropriately ### hmm_examples = [] for gesture in self.gestures[gesture_type]: hmm_rep = gesture.get_hmm_rep() hmm_examples.append(hmm_rep) ### Step 2: fit parameters for the hmm ### hmm = GaussianHMM(self.num_hmm_states) hmm.fit(hmm_examples) ### Step 3: store the hmm in self.hmms ### self.hmms[gesture_type] = hmm print_inner_status( gesture_type, "predicted the following sequences: (score: sequence)") for example in hmm_examples: print " ", hmm.score(example), ": ", hmm.predict(example)
def get_hmms (self): for gesture_type in self.gesture_types: print_status ("Get_Hmms", "Fitting for gesture_type: " + gesture_type) ### Step 1: fill hmm_examples appropriately ### hmm_examples = [] for gesture in self.gestures[gesture_type]: hmm_rep = gesture.get_hmm_rep () hmm_examples.append (hmm_rep) ### Step 2: fit parameters for the hmm ### hmm = GaussianHMM (self.num_hmm_states) hmm.fit (hmm_examples) ### Step 3: store the hmm in self.hmms ### self.hmms[gesture_type] = hmm print_inner_status (gesture_type, "predicted the following sequences: (score: sequence)") for example in hmm_examples: print " ", hmm.score (example), ": ", hmm.predict (example)
def predict(self, obs): """Find most likely state sequence corresponding to `obs`. Parameters ---------- obs : np.ndarray, shape=(n_samples, n_features) Sequence of n_features-dimensional data points. Each row corresponds to a single point in the sequence. Returns ------- hidden_states : np.ndarray, shape=(n_states) Index of the most likely states for each observation """ _, vl = scipy.linalg.eig(self.transmat_, left=True, right=False) startprob = vl[:, 0] / np.sum(vl[:, 0]) model = GaussianHMM(n_components=self.n_states, covariance_type='full') model.startprob_ = startprob model.transmat_ = self.transmat_ model.means_ = self.means_ model.covars_ = self.covars_ return model.predict(obs)
def predict(self, obs): """Find most likely state sequence corresponding to `obs`. Parameters ---------- obs : np.ndarray, shape=(n_samples, n_features) Sequence of n_features-dimensional data points. Each row corresponds to a single point in the sequence. Returns ------- hidden_states : np.ndarray, shape=(n_states) Index of the most likely states for each observation """ _, vl = scipy.linalg.eig(self.transmat_, left=True, right=False) startprob = vl[:, 0] / np.sum(vl[:, 0]) model = GaussianHMM(n_components=self.n_states, covariance_type='full') model.startprob_ = startprob model.transmat_ = self.transmat_ model.means_ = self.means_ model.covars_ = self.covars_ return model.predict(obs)
def gaussian_hmm_model(stock_market_quote, n_components=5): close_v = np.asarray(stock_market_quote.get_closing_price()) volume = np.asanyarray(stock_market_quote.get_volume()) volume = volume[:-1] diff = close_v[1:] - close_v[:-1] close_v = close_v[1:] X = np.column_stack([diff, volume]) model = GaussianHMM(n_components, covariance_type="diag") model.fit([X]) hidden_states = model.predict(X) print "Transition matrix" print model.transmat_ print "" print "means and vars of each hidden state" for i in xrange(n_components): print "%dth hidden state" % i print "mean = ", model.means_[i] print "var = ", np.diag(model.covars_[i]) print "" '''Visualization of Closing Price with respect to Volume, clustered by hidden states of data ''' fig = mlp.figure() ax = fig.add_subplot(111) for i in xrange(n_components): idx = (hidden_states == i) ax.plot(volume[idx], close_v[idx], 'o', label="%dth hidden state" % i) ax.legend() ax.set_xlabel('Volume of Stock', fontsize=20) ax.set_ylabel('Closing Price of Stock', fontsize=20) ax.set_title("""Quote's Volume and closing volume change in different hidden states""") ax.grid(True) mlp.show()
def gaussian_hmm_model(stock_market_quote, n_components=5): close_v = np.asarray(stock_market_quote.get_closing_price()) volume = np.asanyarray(stock_market_quote.get_volume()) volume = volume[:-1] diff = close_v[1:] - close_v[:-1] close_v = close_v[1:] X = np.column_stack([diff, volume]) model = GaussianHMM(n_components, covariance_type="diag") model.fit([X]) hidden_states = model.predict(X) print "Transition matrix" print model.transmat_ print "" print "means and vars of each hidden state" for i in xrange(n_components): print "%dth hidden state" % i print "mean = ", model.means_[i] print "var = ", np.diag(model.covars_[i]) print "" '''Visualization of Closing Price with respect to Volume, clustered by hidden states of data ''' fig = mlp.figure() ax = fig.add_subplot(111) for i in xrange(n_components): idx = (hidden_states == i) ax.plot(volume[idx], close_v[idx], 'o', label="%dth hidden state" % i) ax.legend() ax.set_xlabel('Volume of Stock', fontsize=20) ax.set_ylabel('Closing Price of Stock', fontsize=20) ax.set_title("""Quote's Volume and closing volume change in different hidden states""") ax.grid(True) mlp.show()
data = t1dmread('trimmedDataFiles/MYFILE101.no_gaps_trimmed.csv') timeStamps101 = np.array(data['timestamp']) skinTemp101 = np.array(data['skin temp']) airTemp101 = np.array(data['air temp']) steps101 = np.array(data['steps']) hr101 = np.array(data['hr']) cgm101 = np.array(data['cgm']) normskintemp101 = skinTemp101 - airTemp101 toFit = np.column_stack([cgm101,normskintemp101]) print("Fitting to HMM") n_components = 4 model = GaussianHMM(n_components,covariance_type='diag',n_iter=1000) model.fit([toFit]) hidden_states = model.predict(toFit) print("done\n") print("Transition Matrix for Normed Skin Temperature") print(model.transmat_) print("\nMeans and variances of each hidden state: \n") for i in range(n_components): print("%dth hidden state:" % i) print("Mean = ",model.means_[i]) print("Variance = ",np.diag(model.covars_[i])) print() fig = pl.figure() skinTemp = fig.add_subplot(211)
counts = np.reshape(np.log1p(np.array(counts, dtype = "int")), (-1,1)) hmm = GaussianHMM( len(args.mu) ) hmm.fit([counts]) if args.verbose: sys.stderr.write("Fitting HMM to k-mer counts, assuming {} hidden states...\n".format(len(args.mu))) sys.stderr.write("means:\n" + str(hmm.means_) + "\n") sys.stderr.write("covariances:\n" + str(hmm.covars_) + "\n") sys.stderr.write("\n") sys.stderr.write("Processing possible variant sites...\n") sys.stderr.write("\trejecting haplotypes with read count < {}\n".format(args.maf)) sys.stderr.write("\taccepting as TE/ME any haplotype with max count > {}\n".format(args.maxhits)) ## find positions of transitions states = hmm.predict(counts) breaks = np.where(np.diff(states))[0] break_coords = [] break_kmers = [] break_counts = [] for j in range(0, breaks.shape[0]/2): i_start = breaks[2*j] i_end = breaks[2*j+1]+1 break_coords.append( (kmer_stash[i_start].chrom, kmer_stash[i_start].start, kmer_stash[i_end].start) ) break_kmers.append( (kmer_stash[i_start].name, kmer_stash[i_end].name) ) break_counts.append( (kmer_stash[i_start].score, kmer_stash[i_end].score) ) for i in range(0, len(break_coords)): hap = assemble_inward(msbwt[0], break_kmers[i][0], break_kmers[i][1]) if args.verbose: outline = [ break_coords[i][0], break_coords[i][1], break_counts[i][0], break_coords[i][2], break_counts[i][1] ]
lenData = len(timeStamps) lenTrain = np.ceil(0.8 * lenData) training = np.column_stack([normskintemp[0:lenTrain], numlabels[0:lenTrain]]) test = np.column_stack([normskintemp[(lenTrain + 1) : lenData], numlabels[(lenTrain + 1) : lenData]]) test_timeStamps = timeStamps[lenTrain + 1 : lenData] test_st = normskintemp[int(lenTrain) + 1 : lenData] test_cgm = cgm[int(lenTrain) + 1 : lenData] # "Official" labels for test set data test_labels = numlabels[int(lenTrain) + 1 : lenData] n_components = 3 # Rising, falling, and stable blood sugar model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) model.fit([training]) hidden_states_training = model.predict(training) print("Transition Matrix:\n") print(model.transmat_) for i in range(n_components): print("Hidden state %d:" % i) print("Mean = ", model.means_[i]) print("Variance = ", np.diag(model.covars_[i])) print() hidden_states_test = model.predict(test) print("Test Set Hidden States") test_results = np.empty_like(hidden_states_test, dtype="S10") state_contents = np.empty_like(hidden_states_test)
n_features = X_new.shape[1] print(n_features) # # clf = svm.SVC() # # clf.fit(X_new, y) # hmm = MultinomialHMM() # pos = np.where(np.diff(y) != 0)[0] # d = np.hstack([0, pos+1, len(y)]) # lens = np.diff(d) # hmm.fit(X_new, y, lens) hmm = GaussianHMM(n_components=20) hmm.fit([X_new]) clusters = pred = hmm.predict(X_new) # neigh = KNeighborsClassifier(n_neighbors=10, weights='distance') # scores = cross_validation.cross_val_score(neigh, X_new, y, cv=5) # print(scores) # # neigh.fit(X_new, y) # good_features = ETC.feature_importances_ >= 0.0005 # print(np.sum(good_features)) # X_new2 = X[..., good_features] # n_features = 20 # pca = PCA(n_components = n_features) # pca.fit(X_new) # print(pca.explained_variance_) # X_new = pca.transform(X_new)
class GaussianHmmLib: """ ref: http://scikit-learn.org/0.14/auto_examples/applications/plot_hmm_stock_analysis.html https://www.quantopian.com/posts/inferring-latent-states-using-a-gaussian-hidden-markov-model bear market: smaller mean, higher variant bull market: higher mean, smaller variant """ def __init__(self, dbhandler, *args, **kwargs): self.dbhandler = dbhandler self.sids = self.dbhandler.stock.ids self.n_components = int(kwargs.pop('n_components')) or 5 self.n_iter = int(kwargs.pop('n_iter')) or 1000 def run(self, data): sid = self.sids[0] self.dates = data[sid]['price'].values self.close_v = data[sid]['close_v'].values self.volume = data[sid]['volume'].values[1:] # take diff of close value # this makes len(diff) = len(close_t) - 1 # therefore, others quantity also need to be shifted self.diff = self.close_v[1:] - self.close_v[:-1] # pack diff and volume for training self.X = np.column_stack([self.diff, self.volume]) # make an HMM instance and execute fit self.model = GaussianHMM(self.n_components, covariance_type="diag", n_iter=self.n_iter) self.model.fit([self.X], n_iter=self.n_iter) # predict the optimal sequence of internal hidden state self.hidden_states = self.model.predict(self.X) def report(self): # print trained parameters and plot print "Transition matrix" print self.model.transmat_ print "" print "means and vars of each hidden state" for i in xrange(self.n_components): print "%dth hidden state" % i print "mean = ", self.model.means_[i] print "var = ", np.diag(self.model.covars_[i]) print "" years = YearLocator() # every year months = MonthLocator() # every month yearsFmt = DateFormatter('%Y') fig = plt.figure() ax = fig.add_subplot(111) for i in xrange(self.n_components): # use fancy indexing to plot data in each state idx = (self.hidden_states == i) ax.plot_date(self.dates[idx], self.close_v[idx], 'o', label="%dth hidden state" % i) ax.legend() # format the ticks ax.xaxis.set_major_locator(years) ax.xaxis.set_major_formatter(yearsFmt) ax.xaxis.set_minor_locator(months) ax.autoscale_view() # format the coords message box ax.fmt_xdata = DateFormatter('%Y-%m-%d') ax.fmt_ydata = lambda x: '$%1.2f' % x ax.grid(True) fig.autofmt_xdate() plt.savefig("gaussianhmm_%s.png" %(self.sids[0]))
model = GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01, covars_weight=1, means_prior=None, means_weight=0, n_components=5, random_state=None, startprob=None, startprob_prior=1.0, transmat=None, transmat_prior=1.0) print "Fitting model..." model.fit([farm1.get_output()], n_iter=1000) print "Predicting hidden states..." hidden_states = model.predict(farm1.get_output()) print "Transition matrix" print model.transmat_ print "" print "mean and vars of the hidden states" for i in range(5): print "%dth hidden state" % i print "mean = ", model.means_[i] print "var = ", np.diag(model.covars_[i]) print ""
class GaussianHmmLib: """ ref: http://scikit-learn.org/0.14/auto_examples/applications/plot_hmm_stock_analysis.html https://www.quantopian.com/posts/inferring-latent-states-using-a-gaussian-hidden-markov-model bear market: smaller mean, higher variant bull market: higher mean, smaller variant """ def __init__(self, dbhandler, *args, **kwargs): self.dbhandler = dbhandler self.sids = self.dbhandler.stock.ids self.n_components = int(kwargs.pop('n_components')) or 5 self.n_iter = int(kwargs.pop('n_iter')) or 1000 def run(self, data): sid = self.sids[0] self.dates = data[sid]['price'].values self.close_v = data[sid]['close_v'].values self.volume = data[sid]['volume'].values[1:] # take diff of close value # this makes len(diff) = len(close_t) - 1 # therefore, others quantity also need to be shifted self.diff = self.close_v[1:] - self.close_v[:-1] # pack diff and volume for training self.X = np.column_stack([self.diff, self.volume]) # make an HMM instance and execute fit self.model = GaussianHMM(self.n_components, covariance_type="diag", n_iter=self.n_iter) self.model.fit([self.X], n_iter=self.n_iter) # predict the optimal sequence of internal hidden state self.hidden_states = self.model.predict(self.X) def report(self): # print trained parameters and plot print "Transition matrix" print self.model.transmat_ print "" print "means and vars of each hidden state" for i in xrange(self.n_components): print "%dth hidden state" % i print "mean = ", self.model.means_[i] print "var = ", np.diag(self.model.covars_[i]) print "" years = YearLocator() # every year months = MonthLocator() # every month yearsFmt = DateFormatter('%Y') fig = plt.figure() ax = fig.add_subplot(111) for i in xrange(self.n_components): # use fancy indexing to plot data in each state idx = (self.hidden_states == i) ax.plot_date(self.dates[idx], self.close_v[idx], 'o', label="%dth hidden state" % i) ax.legend() # format the ticks ax.xaxis.set_major_locator(years) ax.xaxis.set_major_formatter(yearsFmt) ax.xaxis.set_minor_locator(months) ax.autoscale_view() # format the coords message box ax.fmt_xdata = DateFormatter('%Y-%m-%d') ax.fmt_ydata = lambda x: '$%1.2f' % x ax.grid(True) fig.autofmt_xdate() plt.savefig("gaussianhmm_%s.png" % (self.sids[0]))
HMM = GaussianHMM(n_components=5, covariance_type="diag", transmat=transitions_prob) # # Must always fit the obs data before change means and covars # HMM.fit([Resul]) HMM.means_ = np.identity(5) HMM.covars_ = 0.2 * np.ones((5, 5)) # Use of LR probability to predict the states. HResul = HMM.predict(Resul) # Get the probability of success HMM Hscore = comp(HResul, target) # print HResul print "HMM = " print Hscore ## Here is just for writing the results inside a CSV file ######################################################################################## # target = np.where(target == 0, 'Lying')
volume = [] labels = [] for row in data: if row[1] != 'close': list = [] volume.append(float(row[2])) #for i in range(len(row)-2): # list.append(int(float(row[i+1])*1000)) label = int(float(row[7]) * 100) labels.append(label) #matrix.append(list) """ Building the HMM """ X = numpy.column_stack([numpy.array(labels), numpy.array(volume)]) classes = model.predict(X) """ calculating the algorithm performance """ result = 0 correct = 0 correctTens = 0 totalTens = 0 trades = 0 for i in range(len(classes)): print classes[i], labels[i] if (classes[i] == 0 and labels[i] == -10) or (classes[i] == 1 and labels[i] == 10): correct += 1 if classes[i] == 1 or classes[i] == 0 or labels == 1 or labels == 0: totalTens += 1
class HMM(object): ''' class for creating and manipulating HMM model ''' def __init__(self,**kwargs): if 'steam_obj' not in kwargs: self.steam_obj = Steam() else: self.steam_obj = kwargs['steam_obj'] if 'weather_obj' not in kwargs: self.weather_obj = Weather() else: self.weather_obj = kwargs['weather_obj'] steam_obj = self.steam_obj weather_obj = self.weather_obj hour_of_day = steam_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0)) day_of_week = steam_obj.ts.index.map(lambda x: x.dayofweek) df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \ 'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=steam_obj.ts.index) #its imp that the order for columns is maintain #while slicing the HMM model self.df_hmm,self.X_hmm = self.gen_meta_data(steam_obj,weather_obj) if 'n_states' not in kwargs: self.plot_elbow(3,15) else: self.n_states = kwargs['n_states'] def __len__(self): return len(self.X_hmm) def build_model(self): n_states = self.n_states X_hmm = self.X_hmm self.model = GaussianHMM(n_states,covariance_type='diag',n_iter=1000) self.model.fit([X_hmm]) self.hidden_states = self.model.predict(X_hmm) def build_forecast_model(self): model = self.model n_states = self.n_states model_forecast = copy.deepcopy(model) model_forecast.n_features = model.n_features-1 model_forecast._means_ = model.means_[:,1:] model_forecast._covars_ = model._covars_[:,1:] self.model_forecast = model_forecast def gen_meta_data(self,steam_obj=None,weather_obj=None): if steam_obj!=None: hour_of_day = steam_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0)) day_of_week = steam_obj.ts.index.map(lambda x: x.dayofweek) df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \ 'hour_of_day':hour_of_day},index=steam_obj.ts.index) #df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \ # 'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=steam_obj.ts.index) # X_hmm = df_hmm.as_matrix(columns=['steam','weather']) X_hmm = df_hmm.as_matrix(columns=['steam','weather','hour_of_day']) #X_hmm = df_hmm.as_matrix(columns=['steam','weather','hour_of_day','day_of_week']) else: hour_of_day = weather_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0)) day_of_week = weather_obj.ts.index.map(lambda x: x.dayofweek) df_hmm = pd.DataFrame({'weather':weather_obj.ts, \ 'hour_of_day':hour_of_day},index=weather_obj.ts.index) #df_hmm = pd.DataFrame({'weather':weather_obj.ts, \ # 'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=weather_obj.ts.index) # X_hmm = df_hmm.as_matrix(columns=['weather']) X_hmm = df_hmm.as_matrix(columns=['weather','hour_of_day']) #X_hmm = df_hmm.as_matrix(columns=['weather','hour_of_day','day_of_week']) return df_hmm,X_hmm def plot_model(self,x_ax=None,y_ax=None): X_hmm = self.X_hmm steam_ts = self.steam_obj.ts if x_ax == None: x_ax = np.asarray([item.to_datetime() for item in steam_ts.index]) if y_ax == None: y_ax = X_hmm[:,0] hidden_states = self.hidden_states n_states = self.n_states fig = plt.figure() ax = fig.add_subplot(111) for i in xrange(n_states): print i idx = (hidden_states==i) if i<7: ax.plot(x_ax[idx],y_ax[idx],'o',label='%dth state'%i) elif i<14: ax.plot(x_ax[idx],y_ax[idx],'x',label='%dth state'%i) elif i<21: ax.plot(x_ax[idx],y_ax[idx],'+',label='%dth state'%i) elif i<28: ax.plot(x_ax[idx],y_ax[idx],'*',label='%dth state'%i) ax.set_title('%d State HMM'%(n_states)) ax.legend() ax.set_ylabel('Load (Mlb/Hr)') ax.set_xlabel('Time') ax.grid(True) plt.show() def plot_elbow(self,start,end): ''' Fit GMM and plot elbow using AIC & BIC ''' from sklearn.mixture import GMM,DPGMM obs = self.X_hmm aics = [] bics = [] for i in range(start,end+1): n_iter=1000 for j in range(1,11): g = GMM(n_components=i,n_iter=n_iter) g.fit(obs) print i converged = g.converged_ if converged: print 'j:%d'%(j) break n_iter += 1000 aics.append(g.aic(obs)) bics.append(g.bic(obs)) if not converged: print 'Not Converged!!' fig = plt.figure() ax = fig.add_subplot(111) ax.plot(range(start,end+1),aics,label='AIC') ax.plot(range(start,end+1),bics,label='BIC') ax.set_xlabel("No. of Clusters") ax.set_ylabel("Information Loss") ax.set_xticks(range(start,end+1),minor=True) ax.legend() ax.grid(True,which='both') plt.show()
algorithm="viterbi", covariance_type="full", covars_prior=0.01, covars_weight=1, means_prior=None, means_weight=0, n_components=5, random_state=None, startprob=None, startprob_prior=1.0, transmat=None, transmat_prior=1.0, ) print "Fitting model..." model.fit([farm1.get_output()], n_iter=1000) print "Predicting hidden states..." hidden_states = model.predict(farm1.get_output()) print "Transition matrix" print model.transmat_ print "" print "mean and vars of the hidden states" for i in range(5): print "%dth hidden state" % i print "mean = ", model.means_[i] print "var = ", np.diag(model.covars_[i]) print ""
print "Training Multivariate Gaussian HMM model..." n_components = 3 model = GaussianHMM(n_components, covariance_type="full", n_iter=10) model.fit(training_sequences) # save Gaussian HMM model to file model_dir = output_dir + '%sstates/' % n_components mkdir_p(model_dir) serialiser = HMMSerialiser(model, feature_names=adaptor.getFeatures()) serialiser.saveXML(model_dir + 'model.xml') print "Tagging observation sequences..." tagged_sequences_dir = model_dir + "tagged_sequences/" mkdir_p(tagged_sequences_dir) for i, training_sequence in enumerate(training_sequences): hidden_state_sequence = model.predict(training_sequence) for j, state in enumerate(hidden_state_sequence): observation_sequences[i].getObservation(j).setState( "H%s" % state ) # save tagged sequence to file filename = filenames[i].replace('.csv', '.tagged.csv') observation_sequences[i].save(tagged_sequences_dir + filename, include_state=True) print "Training GMM-HMM model..." n_components = 3 model = GMMHMM(n_components, n_mix=2, covariance_type="full", n_iter=100) # Multiple Gaussians (GMM) per State and Feature model.fit(training_sequences) # Save GMM HMM model to file serialiser = HMMSerialiser(model, feature_names=adaptor.getFeatures()) serialiser.saveXML(model_dir + 'model_gmm.xml')
# pack diff and volume for training X = np.column_stack([diff, volume]) ############################################################################### # Run Gaussian HMM print "fitting to HMM and decoding ...", n_components = 2 # make an HMM instance and execute fit model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) model.fit([X]) # predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print "done\n" ############################################################################### # print trained parameters and plot print "Transition matrix" print model.transmat_ print "" print "means and vars of each hidden state" for i in xrange(n_components): print "%dth hidden state" % i print "mean = ", model.means_[i] print "var = ", np.diag(model.covars_[i]) print ""
# pack diff and volume for training X1 = np.column_stack([diff1, volume1]) X2 = np.column_stack([diff2, volume2]) ############################################################################### # Run Gaussian HMM print("fitting to HMM and decoding ...", end='') n_components = 5 # make an HMM instance and execute fit model1 = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) model2 = GaussianHMM(n_components, covariance_type="diag", n_iter=1000) model1.fit([X1]) model2.fit([X2]) # predict the optimal sequence of internal hidden state hidden_states1 = model1.predict(X1) hidden_states2 = model2.predict(X2) print("done\n") # calculate similarity measure states1 = range(n_components) states2 = list(itertools.permutations(states1)) print(states1) print(len(states2)) sims = [] for i in range(len(states2)): sim = 0 for j in range(len(hidden_states1)): sim += hidden_states1[j] == states2[i][hidden_states2[j]] #pdb.set_trace()
labels = [] for row in data: if row[1] != 'close': list = [] volume.append(float(row[2])) #for i in range(len(row)-2): # list.append(int(float(row[i+1])*1000)) label = int(float(row[7])*100) labels.append(label) #matrix.append(list) """ Building the HMM """ X = numpy.column_stack([numpy.array(labels), numpy.array(volume)]) classes = model.predict(X) """ calculating the algorithm performance """ result = 0 correct = 0 correctTens = 0 totalTens = 0 trades = 0 for i in range(len(classes)): print classes[i], labels[i] if (classes[i] == 0 and labels[i] == -10) or (classes[i] == 1 and labels[i] == 10): correct += 1 if classes[i] == 1 or classes[i] == 0 or labels == 1 or labels == 0: totalTens += 1
if args.verbose: sys.stderr.write( "Fitting HMM to k-mer counts, assuming {} hidden states...\n".format( len(args.mu))) sys.stderr.write("means:\n" + str(hmm.means_) + "\n") sys.stderr.write("covariances:\n" + str(hmm.covars_) + "\n") sys.stderr.write("\n") sys.stderr.write("Processing possible variant sites...\n") sys.stderr.write("\trejecting haplotypes with read count < {}\n".format( args.maf)) sys.stderr.write( "\taccepting as TE/ME any haplotype with max count > {}\n".format( args.maxhits)) ## find positions of transitions states = hmm.predict(counts) breaks = np.where(np.diff(states))[0] break_coords = [] break_kmers = [] break_counts = [] for j in range(0, breaks.shape[0] / 2): i_start = breaks[2 * j] i_end = breaks[2 * j + 1] + 1 break_coords.append((kmer_stash[i_start].chrom, kmer_stash[i_start].start, kmer_stash[i_end].start)) break_kmers.append((kmer_stash[i_start].name, kmer_stash[i_end].name)) break_counts.append((kmer_stash[i_start].score, kmer_stash[i_end].score)) for i in range(0, len(break_coords)): hap = assemble_inward(msbwt[0], break_kmers[i][0], break_kmers[i][1]) if args.verbose: