Example #1
0
def train(X, n_components):
    ###############################################################################
    # Run Gaussian HMM
    print ("fitting to HMM and decoding ...")

    # make an HMM instance and execute fit
    model = GaussianHMM(n_components, covariance_type="diag", n_iter=2000)

    model.fit([X])

    # predict the optimal sequence of internal hidden state
    hidden_states = model.predict(X)

    print ("done\n")

    ###############################################################################
    # print trained parameters and plot
    print ("Transition matrix")
    print (model.transmat_)
    print ()

    print ("means and vars of each hidden state")
    for i in range(n_components):
        print ("%dth hidden state" % i)
        print ("mean = ", model.means_[i])
        print ("var = ", np.diag(model.covars_[i]))
        print ()

    return hidden_states, model
Example #2
0
def use_hmm(img_times, change_vals, fps=10, min_secs_for_train_to_pass=8):
    
    from sklearn.hmm import GaussianHMM    
    X = np.column_stack(change_vals)    
    n_components = 2
    model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)
    model.fit([X.T])
    
    #thresh = 10**-15
    #model.transmat_ = np.array([[1-thresh,thresh],[1-thresh,thresh]])
    hidden_states = model.predict(X.T)
    
    # print trained parameters and plot
    print("Transition matrix")
    print(model.transmat_)
    print()
    
    print("means and vars of each hidden state")
    for i in range(n_components):
        print("%dth hidden state" % i)
        print("mean = ", model.means_[i])
        print("var = ", np.diag(model.covars_[i]))
        print()    
    
    if model.means_[0][0] > model.means_[1][0]: # assume most most frames have no train, switch labels if necessary
        hidden_states = 1 - hidden_states
        
    train_spotted = filter_out_short_motions(hidden_states, min_secs_for_train_to_pass, fps)
    
    plot_timeline(img_times, change_vals, hidden_states, train_spotted)
    
    utils.copy_image_subset(config.experiment_data_frames, config.experiment_output_frames_hmm, np.nonzero(train_spotted)[0])
    
    return train_spotted
Example #3
0
def predictWithHMM(index, window = 252):
    training_X = X[range(index-window,index),:]
    training_y = actual_y[range(index-window,index)]
    testing_X  = X[index,:].reshape(1,training_X.shape[1])
    testing_y  = y[index]

    # PCA DATA
    if perform_pca:
        pca        = PCA(n_components= pca_components)
        pca.fit(training_X)
        training_X = pca.transform(training_X)
        testing_X  = pca.transform(testing_X)


    model = GaussianHMM(n_components, "diag",n_iter=1000)
    model.fit([training_X])

    hidden_states          = model.predict(training_X)
    predicted_hidden_state = model.predict(testing_X)

    # DO PROBALISTIC APPROACH
    # pr = model.predict_proba(testing_X)
    # print pr

    prob = 0
    state_idx  = (hidden_states == predicted_hidden_state)
    median_val = np.mean(training_y[state_idx])

    return int(median_val>0), testing_y, prob
Example #4
0
def create_hmm_by_label(label):
    
    seqs = get_sequences_by_label(label)
    
    n_states = 3
    hmm = GaussianHMM(n_states, covariance_type="diag", n_iter=1000)
    hmm.fit([seqs])
    
    return hmm
Example #5
0
    def __init__(self, n_states, n_features):
        from sklearn.hmm import GaussianHMM
        self.impl = GaussianHMM(n_states, params='stmc')

        self._sequences = None
        self.means_ = None
        self.vars_ = None
        self.transmat_ = None
        self.startprob_ = None
	def fit_HMMs(self, apans=None, dpans=None):
		if apans is not None: self.get_observations(apans, dpans)
		# gather data

		self.HMMs_dead = {}
		self.HMMs_alive = {}
		self.risk_vectors_dead  = []
		self.risk_vectors_alive = []

		print "Training HMM's"
		for v in self.vitals_available:
			self.HMMs_dead[v] = GaussianHMM(self.nstates_dead[v], self.covariance_type ).fit(self.observations_dead[v])
			self.HMMs_alive[v] = GaussianHMM(self.nstates_alive[v], self.covariance_type ).fit(self.observations_alive[v])
Example #7
0
class HMMGestureMonitor (GestureMonitor):

	def __init__ (self, _train_ms_list, _gesture_name, FeatureExtractor=AVFeatureExtractor):

		GestureMonitor.__init__ (self, _train_ms_list, _gesture_name, FeatureExtractor)

	def train (self, motion_sequences):

		dfs 					= [ms.get_dataframe () for ms in motion_sequences]
		examples				= [self.feature_extractor.extract (df) for df in dfs]
		examples				= [e for e in examples if not np.isnan(np.sum(e))]

		self.hmm 				= GaussianHMM (n_components=5).fit (examples)
		self.score_threshold 	= GMScoreThreshold (self.hmm.score, examples)
		self.window_timespans 	= self.calculate_window_timespans (motion_sequences)

	def classify_window_df (self, window_df):

		features = self.feature_extractor.extract (window_df)
		score = self.score_threshold.classify (features)
		return score

	def get_current_reaction (self):

		scores = [self.hmm.score (self.feature_extractor.extract(window_df)) for window_df in self.get_window_dfs ()]
		if len(scores) > 0:
			return np.max(scores)
		else:
			return None
Example #8
0
    def run(self, protos):
        models = []
        for nstate, label, seq in protos:
            train  = self._training.run(seq)
            f1, f2 = self._feature.run(train, True)

            o = np.vstack((f1[:,1], f2)).T

            (start, trans) = self.init_left_right_model(nstate)
            clf = GaussianHMM(n_components=nstate, covariance_type=self._covar,
                              transmat=trans, startprob=start)
            clf.fit(np.array([o]))
            models.append({'id':label, 'model':clf})

        self._models = models
        return models
Example #9
0
def create_hmm_by_labels(labels, dbs):
    
    seqs_all= []
    for label in labels:
        seqs = get_sequences_by_label_multi_dbs(label, dbs)
        seqs_all.append(seqs)
    
    seqs_all = np.array(seqs_all)[0]
    
    #print seqs_all
    #print np.shape(seqs_all)

    n_states = 3
    hmm = GaussianHMM(n_states, covariance_type="full", n_iter=1000)
    hmm.fit(seqs_all)
    
    return hmm
def get_trained_model(rootpath, condition, n_states, n_iterations, feature, cov_type):
    fname_mean = condition + '-cond-' + feature + '-feat-' + str(n_states) + '-states-' + str(n_iterations) + '-iter-mean.txt'
    fname_cov = condition + '-cond-' + feature + '-feat-' + str(n_states) + '-states-' + str(n_iterations) + '-iter-cov.txt'
    fname_tmat = condition + '-cond-' + feature + '-feat-' + str(n_states) + '-states-' + str(n_iterations) + '-iter-transtion.txt'

    constructed_path_mean = rootpath + condition + '/' + fname_mean
    mean = np.loadtxt(constructed_path_mean)
    iter_list = range(n_states)
    iter_list.reverse()
    deleted_means = []
    for i in iter_list:
        if mean[i][mean[i] > 0.01].shape[0] == 0:
            print 'skipping deleting ith mean:', i, mean[i]
            #mean = np.delete(mean, i, 0)
            #deleted_means.append(i)

    constructed_path_cov = rootpath + condition + '/' + fname_cov
    if cov_type == 'full':
        cov = load_full(constructed_path_cov, n_states, 10)
    else:
        cov = np.loadtxt(constructed_path_cov)
    constructed_path_tmat = rootpath + condition + '/' + fname_tmat
    tmat = np.loadtxt(constructed_path_tmat)
    #fixing tmat if any of the means and covs were deleted
    deleted_means.sort()
    deleted_means.reverse()
    for di in deleted_means:
        tmat = np.delete(tmat, di, 1)
        tmat = np.delete(tmat, di, 0)

    smat = np.zeros(tmat.shape[0])
    smat[0] = 1.0
    sum_fix = np.sum(tmat, axis=1)
    sum_fix = 1.0 / sum_fix
    #print tmat
    for i in range(tmat.shape[0]):
        tmat[i] = tmat[i] * sum_fix[i]
        #print 'corrected\n', tmat
    if n_states != tmat.shape[0]:
        print 'removed some states, n_states now corrected to: ', tmat.shape[0], 'was originaly', n_states
        n_states = tmat.shape[0]
    model = GaussianHMM(n_components=n_states, covariance_type=cov_type, startprob=smat, transmat=tmat, n_iter=0, init_params='mc')
    model.means_ = mean
    model.covars_ = cov
    return model
Example #11
0
    def __init__(self, n_states, n_features):
        from sklearn.hmm import GaussianHMM
        self.impl = GaussianHMM(n_states, params='stmc')

        self._sequences = None
        self.means_ = None
        self.vars_ = None
        self.transmat_ = None
        self.startprob_ = None
Example #12
0
    def run(self, protos):
        models = []
        for nstate, label, seq in protos:
            train = self._training.run(seq)
            f1, f2 = self._feature.run(train, True)

            o = np.vstack((f1[:, 1], f2)).T

            (start, trans) = self.init_left_right_model(nstate)
            clf = GaussianHMM(n_components=nstate,
                              covariance_type=self._covar,
                              transmat=trans,
                              startprob=start)
            clf.fit(np.array([o]))
            models.append({'id': label, 'model': clf})

        self._models = models
        return models
Example #13
0
	def train (self, motion_sequences):

		dfs 					= [ms.get_dataframe () for ms in motion_sequences]
		examples				= [self.feature_extractor.extract (df) for df in dfs]
		examples				= [e for e in examples if not np.isnan(np.sum(e))]

		self.hmm 				= GaussianHMM (n_components=5).fit (examples)
		self.score_threshold 	= GMScoreThreshold (self.hmm.score, examples)
		self.window_timespans 	= self.calculate_window_timespans (motion_sequences)
Example #14
0
def HMM(data, sid, means_prior=None):
    # data is _not_ an event-frame, but an array
    # of the most recent trade events

    # Create scikit-learn model using the means
    # from the previous model as a prior
    model = GaussianHMM(HIDDEN_STATES, covariance_type="diag", n_iter=10, means_prior=means_prior, means_weight=0.5)

    # Extract variation and volume
    diff = data.variation[sid].values
    volume = data.volume[sid].values
    X = np.column_stack([diff, volume])

    if len(diff) < HIDDEN_STATES:
        return None

    # Estimate model
    model.fit([X])

    return model
	def get_hmms (self):

		for gesture_type in self.gesture_types:

			print_status ("Get_Hmms", "Fitting for gesture_type: " + gesture_type)
			### Step 1: fill hmm_examples appropriately ###
			hmm_examples = []
			for gesture in self.gestures[gesture_type]:
				hmm_rep = gesture.get_hmm_rep ()
				hmm_examples.append (hmm_rep)

			### Step 2: fit parameters for the hmm ###
			hmm = GaussianHMM (self.num_hmm_states)
			hmm.fit (hmm_examples)

			### Step 3: store the hmm in self.hmms ###
			self.hmms[gesture_type] = hmm

			print_inner_status (gesture_type, "predicted the following sequences: (score: sequence)")
			for example in hmm_examples:
				print "		", hmm.score (example), ": ", hmm.predict (example)
Example #16
0
def predictWithHMM(index, window=252):
    training_X = X[range(index - window, index), :]
    training_y = actual_y[range(index - window, index)]
    testing_X = X[index, :].reshape(1, training_X.shape[1])
    testing_y = y[index]

    # PCA DATA
    if perform_pca:
        pca = PCA(n_components=pca_components)
        pca.fit(training_X)
        training_X = pca.transform(training_X)
        testing_X = pca.transform(testing_X)

    model = GaussianHMM(n_components, "diag", n_iter=1000)
    model.fit([training_X])

    hidden_states = model.predict(training_X)
    predicted_hidden_state = model.predict(testing_X)

    # DO PROBALISTIC APPROACH
    # pr = model.predict_proba(testing_X)
    # print pr

    prob = 0
    state_idx = (hidden_states == predicted_hidden_state)
    median_val = np.mean(training_y[state_idx])

    return int(median_val > 0), testing_y, prob
Example #17
0
    def get_hmms(self):

        for gesture_type in self.gesture_types:

            print_status("Get_Hmms",
                         "Fitting for gesture_type: " + gesture_type)

            ### Step 1: fill hmm_examples appropriately ###
            hmm_examples = []
            for gesture in self.gestures[gesture_type]:
                hmm_rep = gesture.get_hmm_rep()
                hmm_examples.append(hmm_rep)

            ### Step 2: fit parameters for the hmm ###
            hmm = GaussianHMM(self.num_hmm_states)
            hmm.fit(hmm_examples)

            ### Step 3: store the hmm in self.hmms ###
            self.hmms[gesture_type] = hmm

            print_inner_status(
                gesture_type,
                "predicted the following sequences: (score: sequence)")
            for example in hmm_examples:
                print "		", hmm.score(example), ": ", hmm.predict(example)
Example #18
0
    def run(self, data):
        sid = self.sids[0]
        self.dates = data[sid]['price'].values
        self.close_v = data[sid]['close_v'].values
        self.volume = data[sid]['volume'].values[1:]

        # take diff of close value
        # this makes len(diff) = len(close_t) - 1
        # therefore, others quantity also need to be shifted
        self.diff = self.close_v[1:] - self.close_v[:-1]

        # pack diff and volume for training
        self.X = np.column_stack([self.diff, self.volume])

        # make an HMM instance and execute fit
        self.model = GaussianHMM(self.n_components,
                                 covariance_type="diag",
                                 n_iter=self.n_iter)
        self.model.fit([self.X], n_iter=self.n_iter)

        # predict the optimal sequence of internal hidden state
        self.hidden_states = self.model.predict(self.X)
def gaussian_hmm_model(stock_market_quote, n_components=5):
    close_v = np.asarray(stock_market_quote.get_closing_price())
    volume = np.asanyarray(stock_market_quote.get_volume())
    volume = volume[:-1]
    diff = close_v[1:] - close_v[:-1]
    close_v = close_v[1:]
    X = np.column_stack([diff, volume])
    model = GaussianHMM(n_components, covariance_type="diag")
    model.fit([X])
    hidden_states = model.predict(X)
    
    print "Transition matrix"
    print model.transmat_
    print ""
    
    print "means and vars of each hidden state"
    for i in xrange(n_components):
        print "%dth hidden state" % i
        print "mean = ", model.means_[i]
        print "var = ", np.diag(model.covars_[i])
        print ""
    
    '''Visualization of Closing Price with respect to Volume, clustered by
    hidden states of data
    '''
    fig = mlp.figure()
    ax = fig.add_subplot(111)
    for i in xrange(n_components):
        idx = (hidden_states == i)
        ax.plot(volume[idx], close_v[idx], 'o', label="%dth hidden state" % i)
    ax.legend()
    ax.set_xlabel('Volume of Stock', fontsize=20)
    ax.set_ylabel('Closing Price of Stock', fontsize=20)
    ax.set_title("""Quote's Volume and closing volume change 
                    in different hidden states""")
    ax.grid(True)
    mlp.show()
Example #20
0
def gaussian_hmm_model(stock_market_quote, n_components=5):
    close_v = np.asarray(stock_market_quote.get_closing_price())
    volume = np.asanyarray(stock_market_quote.get_volume())
    volume = volume[:-1]
    diff = close_v[1:] - close_v[:-1]
    close_v = close_v[1:]
    X = np.column_stack([diff, volume])
    model = GaussianHMM(n_components, covariance_type="diag")
    model.fit([X])
    hidden_states = model.predict(X)

    print "Transition matrix"
    print model.transmat_
    print ""

    print "means and vars of each hidden state"
    for i in xrange(n_components):
        print "%dth hidden state" % i
        print "mean = ", model.means_[i]
        print "var = ", np.diag(model.covars_[i])
        print ""
    '''Visualization of Closing Price with respect to Volume, clustered by
    hidden states of data
    '''
    fig = mlp.figure()
    ax = fig.add_subplot(111)
    for i in xrange(n_components):
        idx = (hidden_states == i)
        ax.plot(volume[idx], close_v[idx], 'o', label="%dth hidden state" % i)
    ax.legend()
    ax.set_xlabel('Volume of Stock', fontsize=20)
    ax.set_ylabel('Closing Price of Stock', fontsize=20)
    ax.set_title("""Quote's Volume and closing volume change 
                    in different hidden states""")
    ax.grid(True)
    mlp.show()
Example #21
0
def hmm(samples):
	model = GaussianHMM(n_components=3)
	samples = samples.dropna()
	idx = samples.index
	if samples.values.ndim < 2:
		#import pdb; pdb.set_trace()
		m = samples.values.shape
		samples = samples.values.reshape(m[0],1)
	
	model.fit([samples])
	#_, states = model.decode(samples, algorithm='map')
	framelogprob = model._compute_log_likelihood(samples)
	logprob, fwdlattice = model._do_forward_pass(framelogprob)
	
	n, _ = model.means_.shape
	frame = pd.DataFrame(
    	framelogprob, index=idx, columns=map(lambda x: "frame_"+str(x), range(n)) )
	forward = pd.DataFrame(
    	fwdlattice, index=idx, columns=map(lambda x: "forward_"+str(x), range(n)) )
	#import pdb; pdb.set_trace()
	predict = pd.DataFrame(
		(fwdlattice-framelogprob)[1:, :], index=idx[:-1], columns=map(lambda x: "predict_"+str(x), range(n)))
	import pdb; pdb.set_trace()
	return model, frame.join(forward)
Example #22
0
def main():
    """
    First ARG: list of training files
    Second ARG: save name for model
    """
    file1 = sys.argv[1]
    outname = sys.argv[2]
    file_list = [f[0:-1] for f in open(file1, 'r')]
    models, transitions, priors = calc_transmat(file_list)
    hmm = GaussianHMM(
        transitions.shape[0],
        "full",
        #startprob=priors,
        n_iter=500,
        transmat=transitions,
        init_params='mcs',
        params='mcs',
    )
    feats, _ = load_feats_labels(file_list)
    feat, lab = load_feats_labels(file_list)
    #hmm.means_ = np.transpose(models['mean'])
    #hmm.covars_ = models['sigma']
    print 'Fitting'

    start = timeit.default_timer()
    hmm.fit([np.transpose(feat)])
    stop = timeit.default_timer()
    print 'Training Time: ' + str(stop - start)

    features, labels = load_feats_labels(['audio.arff'])
    _, seq = hmm.decode(np.transpose(features))
    #print filter(lambda(x,y): x==y, zip(labels, map(int2label, seq)))
    print len(filter(lambda (x, y): x == y, zip(labels, map(int2label, seq))))
    pickle.dump(hmm, open(outname, "wb"))
    plt.imshow(transitions, interpolation='nearest')
    plt.show()
Example #23
0
def main():
    """
    First ARG: list of training files
    Second ARG: save name for model
    """
    file1 = sys.argv[1]
    outname = sys.argv[2]
    file_list = [f[0:-1] for f in open(file1,'r')]
    models, transitions, priors = calc_transmat(file_list)
    hmm = GaussianHMM(
        transitions.shape[0],
        "full",
        #startprob=priors,
        n_iter=500,
        transmat=transitions,
        init_params='mcs',
        params='mcs',
    )
    feats, _ = load_feats_labels(file_list)
    feat, lab = load_feats_labels(file_list)
    #hmm.means_ = np.transpose(models['mean'])
    #hmm.covars_ = models['sigma']
    print 'Fitting'

    start = timeit.default_timer()
    hmm.fit([np.transpose(feat)])
    stop = timeit.default_timer()
    print 'Training Time: ' + str(stop - start)

    features, labels = load_feats_labels(['audio.arff'])
    _, seq = hmm.decode(np.transpose(features))
    #print filter(lambda(x,y): x==y, zip(labels, map(int2label, seq)))
    print len(filter(lambda(x,y): x==y, zip(labels, map(int2label, seq))))
    pickle.dump(hmm, open(outname, "wb"))
    plt.imshow(transitions, interpolation='nearest')
    plt.show()
Example #24
0
    def run(self, data):
        sid = self.sids[0]
        self.dates = data[sid]['price'].values
        self.close_v = data[sid]['close_v'].values
        self.volume = data[sid]['volume'].values[1:]

        # take diff of close value
        # this makes len(diff) = len(close_t) - 1
        # therefore, others quantity also need to be shifted
        self.diff = self.close_v[1:] - self.close_v[:-1]

        # pack diff and volume for training
        self.X = np.column_stack([self.diff, self.volume])

        # make an HMM instance and execute fit
        self.model = GaussianHMM(self.n_components, covariance_type="diag", n_iter=self.n_iter)
        self.model.fit([self.X], n_iter=self.n_iter)

        # predict the optimal sequence of internal hidden state
        self.hidden_states = self.model.predict(self.X)
Example #25
0
class _SklearnGaussianHMMCPUImpl(object):

    def __init__(self, n_states, n_features):
        from sklearn.hmm import GaussianHMM
        self.impl = GaussianHMM(n_states, params='stmc')

        self._sequences = None
        self.means_ = None
        self.vars_ = None
        self.transmat_ = None
        self.startprob_ = None

    def do_estep(self):
        from sklearn.utils.extmath import logsumexp

        self.impl.means_ = self.means_.astype(np.double)
        self.impl.covars_ = self.vars_.astype(np.double)
        self.impl.transmat_ = self.transmat_.astype(np.double)
        self.impl.startprob_ = self.startprob_.astype(np.double)
        stats = self.impl._initialize_sufficient_statistics()
        curr_logprob = 0
        for seq in self._sequences:
            seq = seq.astype(np.double)
            framelogprob = self.impl._compute_log_likelihood(seq)
            lpr, fwdlattice = self.impl._do_forward_pass(framelogprob)
            bwdlattice = self.impl._do_backward_pass(framelogprob)
            gamma = fwdlattice + bwdlattice
            posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T
            curr_logprob += lpr
            self.impl._accumulate_sufficient_statistics(
                stats, seq, framelogprob, posteriors, fwdlattice,
                bwdlattice, self.impl.params)

        return curr_logprob, stats

    def do_viterbi(self):
        logprob = 0
        state_sequences = []
        for obs in self._sequences:
            lpr, ss = self.impl._decode_viterbi(obs)
            logprob += lpr
            state_sequences.append(ss)

        return logprob, state_sequences
Example #26
0
class _SklearnGaussianHMMCPUImpl(object):
    def __init__(self, n_states, n_features):
        from sklearn.hmm import GaussianHMM
        self.impl = GaussianHMM(n_states, params='stmc')

        self._sequences = None
        self.means_ = None
        self.vars_ = None
        self.transmat_ = None
        self.startprob_ = None

    def do_estep(self):
        from sklearn.utils.extmath import logsumexp

        self.impl.means_ = self.means_.astype(np.double)
        self.impl.covars_ = self.vars_.astype(np.double)
        self.impl.transmat_ = self.transmat_.astype(np.double)
        self.impl.startprob_ = self.startprob_.astype(np.double)
        stats = self.impl._initialize_sufficient_statistics()
        curr_logprob = 0
        for seq in self._sequences:
            seq = seq.astype(np.double)
            framelogprob = self.impl._compute_log_likelihood(seq)
            lpr, fwdlattice = self.impl._do_forward_pass(framelogprob)
            bwdlattice = self.impl._do_backward_pass(framelogprob)
            gamma = fwdlattice + bwdlattice
            posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T
            curr_logprob += lpr
            self.impl._accumulate_sufficient_statistics(
                stats, seq, framelogprob, posteriors, fwdlattice, bwdlattice,
                self.impl.params)

        return curr_logprob, stats

    def do_viterbi(self):
        logprob = 0
        state_sequences = []
        for obs in self._sequences:
            lpr, ss = self.impl._decode_viterbi(obs)
            logprob += lpr
            state_sequences.append(ss)

        return logprob, state_sequences
Example #27
0
    def predict(self, obs):
        """Find most likely state sequence corresponding to `obs`.

        Parameters
        ----------
        obs : np.ndarray, shape=(n_samples, n_features)
            Sequence of n_features-dimensional data points. Each row
            corresponds to a single point in the sequence.

        Returns
        -------
        hidden_states : np.ndarray, shape=(n_states)
            Index of the most likely states for each observation
        """
        _, vl = scipy.linalg.eig(self.transmat_, left=True, right=False)
        startprob = vl[:, 0] / np.sum(vl[:, 0])

        model = GaussianHMM(n_components=self.n_states, covariance_type='full')
        model.startprob_ = startprob
        model.transmat_ = self.transmat_
        model.means_ = self.means_
        model.covars_ = self.covars_
        return model.predict(obs)
Example #28
0
    def predict(self, obs):
        """Find most likely state sequence corresponding to `obs`.

        Parameters
        ----------
        obs : np.ndarray, shape=(n_samples, n_features)
            Sequence of n_features-dimensional data points. Each row
            corresponds to a single point in the sequence.

        Returns
        -------
        hidden_states : np.ndarray, shape=(n_states)
            Index of the most likely states for each observation
        """
        _, vl = scipy.linalg.eig(self.transmat_, left=True, right=False)
        startprob = vl[:, 0] / np.sum(vl[:, 0])

        model = GaussianHMM(n_components=self.n_states, covariance_type='full')
        model.startprob_ = startprob
        model.transmat_ = self.transmat_
        model.means_ = self.means_
        model.covars_ = self.covars_
        return model.predict(obs)
Example #29
0
 def build_model(self):
     n_states = self.n_states
     X_hmm = self.X_hmm
     self.model = GaussianHMM(n_states,covariance_type='diag',n_iter=1000)
     self.model.fit([X_hmm])
     self.hidden_states = self.model.predict(X_hmm)
Example #30
0
class HMM(object):
    '''
    class for creating and manipulating HMM model
    '''
    def __init__(self,**kwargs):
        if 'steam_obj' not in kwargs:
            self.steam_obj = Steam()
        else:
            self.steam_obj = kwargs['steam_obj']
        if 'weather_obj' not in kwargs:
            self.weather_obj = Weather()
        else:
            self.weather_obj = kwargs['weather_obj']
        steam_obj = self.steam_obj
        weather_obj = self.weather_obj
        hour_of_day = steam_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0))
        day_of_week = steam_obj.ts.index.map(lambda x: x.dayofweek)
        df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \
                'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=steam_obj.ts.index)
        #its imp that the order for columns is maintain 
        #while slicing the HMM model 
        self.df_hmm,self.X_hmm = self.gen_meta_data(steam_obj,weather_obj) 
        if 'n_states' not in kwargs:
            self.plot_elbow(3,15)
        else:
            self.n_states = kwargs['n_states']

    def __len__(self):
        return len(self.X_hmm)

    def build_model(self):
        n_states = self.n_states
        X_hmm = self.X_hmm
        self.model = GaussianHMM(n_states,covariance_type='diag',n_iter=1000)
        self.model.fit([X_hmm])
        self.hidden_states = self.model.predict(X_hmm)

    def build_forecast_model(self):
        model = self.model
        n_states = self.n_states
        model_forecast = copy.deepcopy(model)
        model_forecast.n_features = model.n_features-1
        model_forecast._means_ = model.means_[:,1:]
        model_forecast._covars_ = model._covars_[:,1:]
        self.model_forecast = model_forecast

    def gen_meta_data(self,steam_obj=None,weather_obj=None):
        if steam_obj!=None:
            hour_of_day = steam_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0))
            day_of_week = steam_obj.ts.index.map(lambda x: x.dayofweek)           
            df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \
                        'hour_of_day':hour_of_day},index=steam_obj.ts.index)
            #df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \
            #            'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=steam_obj.ts.index)
           # X_hmm = df_hmm.as_matrix(columns=['steam','weather'])
            X_hmm = df_hmm.as_matrix(columns=['steam','weather','hour_of_day'])
            #X_hmm = df_hmm.as_matrix(columns=['steam','weather','hour_of_day','day_of_week'])
        else:
            hour_of_day = weather_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0))
            day_of_week = weather_obj.ts.index.map(lambda x: x.dayofweek)           
            df_hmm = pd.DataFrame({'weather':weather_obj.ts, \
                    'hour_of_day':hour_of_day},index=weather_obj.ts.index)
            #df_hmm = pd.DataFrame({'weather':weather_obj.ts, \
            #        'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=weather_obj.ts.index)
           # X_hmm = df_hmm.as_matrix(columns=['weather'])
            X_hmm = df_hmm.as_matrix(columns=['weather','hour_of_day'])
            #X_hmm = df_hmm.as_matrix(columns=['weather','hour_of_day','day_of_week'])
        return df_hmm,X_hmm

    def plot_model(self,x_ax=None,y_ax=None):
        X_hmm = self.X_hmm
        steam_ts = self.steam_obj.ts
        if x_ax == None:
            x_ax = np.asarray([item.to_datetime() for item in steam_ts.index])
        if y_ax == None:
            y_ax = X_hmm[:,0]
        hidden_states = self.hidden_states
        n_states = self.n_states
        fig = plt.figure()
        ax = fig.add_subplot(111)
        for i in xrange(n_states):
            print i
            idx = (hidden_states==i)
            if i<7:
                ax.plot(x_ax[idx],y_ax[idx],'o',label='%dth state'%i)
            elif i<14:
                ax.plot(x_ax[idx],y_ax[idx],'x',label='%dth state'%i)
            elif i<21:
                ax.plot(x_ax[idx],y_ax[idx],'+',label='%dth state'%i)
            elif i<28:
                ax.plot(x_ax[idx],y_ax[idx],'*',label='%dth state'%i)
        ax.set_title('%d State HMM'%(n_states))
        ax.legend()
        ax.set_ylabel('Load (Mlb/Hr)')
        ax.set_xlabel('Time')
        ax.grid(True)
        plt.show()


    def plot_elbow(self,start,end):
        '''
        Fit GMM and plot elbow using AIC & BIC
        '''
        from sklearn.mixture import GMM,DPGMM
        obs = self.X_hmm
        aics = []
        bics = []
        for i in range(start,end+1):
            n_iter=1000
            for j in range(1,11):
                g = GMM(n_components=i,n_iter=n_iter)
                g.fit(obs)
                print i
                converged =  g.converged_
                if converged:
                    print 'j:%d'%(j)
                    break
                n_iter += 1000
            aics.append(g.aic(obs))
            bics.append(g.bic(obs))
        if not converged:
            print 'Not Converged!!'
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(range(start,end+1),aics,label='AIC')
        ax.plot(range(start,end+1),bics,label='BIC')
        ax.set_xlabel("No. of Clusters")
        ax.set_ylabel("Information Loss")
        ax.set_xticks(range(start,end+1),minor=True)
        ax.legend()
        ax.grid(True,which='both')
        plt.show()
            start_cov = EPS * np.ones(len(pre_mean))
        else:
            start_cov = EPS * np.identity(len(pre_mean))
        means = np.vstack(([start_mean], means))
        covs = np.vstack(([start_cov], covs))
    return means, covs


if __name__ == "__main__":
    root = '../../lowres_features/'
    train_map = open(root + 'trainset.recs.updated.lowres.cleaned', 'r').readlines()
    train_map = [(line.split('/')[3], line.split('\t')[0], line.split('\t')[1]) for line in train_map]
    n_states = 10
    means, covs = get_states(n_states - 3, 'sirs', 'deviation', end=True)
    tmat, smat = get_tmat_smat_with_end(n_states - 3)
    model = GaussianHMM(n_components=n_states, covariance_type="diag", startprob=smat, transmat=tmat, n_iter=2, init_params='mc')
    for condition, file_path, incident_time in train_map:
        if condition == 'sirs':
        #condition, file_path, incident_time = train_map[110] # a random patient file
            print condition, file_path, incident_time

            t, last_index = overlapped_samples(file_path, incident_reported_time=int(incident_time), overlap=5, window=10, with_end=2)
            if t is None:
                print file_path, 'is bad'
            else:
                model.means_ = means
                model.covars_ = covs
                print 'shape intial', np.shape(covs)
                '''
                best_seq = model.decode(t)
                print 'intial,', best_seq
def test_2():
    n_features = 3
    length = 32
    
    for n_states in [4]:
        t1 = np.random.randn(length, n_features)
        means = np.random.randn(n_states, n_features)
        vars = np.random.rand(n_states, n_features)
        transmat = np.random.rand(n_states, n_states)
        transmat = transmat / np.sum(transmat, axis=1)[:, None]
        startprob = np.random.rand(n_states)
        startprob = startprob / np.sum(startprob)
        
        chmm = GaussianHMMCPUImpl(n_states, n_features)
        chmm._sequences = [t1]

        pyhmm = GaussianHMM(n_components=n_states, init_params='', params='', covariance_type='diag')
        chmm.means_ = means.astype(np.float32)
        chmm.vars_ = vars.astype(np.float32)
        chmm.transmat_ = transmat.astype(np.float32)
        chmm.startprob_ = startprob.astype(np.float32)
        clogprob, cstats = chmm.do_estep()

        pyhmm.means_ = means
        pyhmm.covars_ = vars
        pyhmm.transmat_ = transmat
        pyhmm.startprob_ = startprob

        framelogprob = pyhmm._compute_log_likelihood(t1)
        fwdlattice = pyhmm._do_forward_pass(framelogprob)[1]
        bwdlattice = pyhmm._do_backward_pass(framelogprob)
        gamma = fwdlattice + bwdlattice
        posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T
        stats = pyhmm._initialize_sufficient_statistics()
        pyhmm._accumulate_sufficient_statistics(
            stats, t1, framelogprob, posteriors, fwdlattice,
            bwdlattice, 'stmc')

        yield lambda: np.testing.assert_array_almost_equal(stats['trans'], cstats['trans'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(stats['post'], cstats['post'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(stats['obs'], cstats['obs'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(stats['obs**2'], cstats['obs**2'], decimal=3)
Example #33
0
def makeGaussHMM(d):
    for i in range(len(d)):
        d[i] = normalize(d[i])
    new_mod = GaussianHMM(4, n_iter = 10000)
    new_results = new_mod.fit(d)
    return new_results
Example #34
0
    for i, row in enumerate(t[1:]):
        farm1.fill(i, row[0], row[1])
        farm2.fill(i, row[0], row[2])
        farm3.fill(i, row[0], row[3])
        farm4.fill(i, row[0], row[4])
        farm5.fill(i, row[0], row[5])
        farm6.fill(i, row[0], row[6])
        farm7.fill(i, row[0], row[7])

    model = GaussianHMM(algorithm='viterbi',
                        covariance_type='full',
                        covars_prior=0.01,
                        covars_weight=1,
                        means_prior=None,
                        means_weight=0,
                        n_components=5,
                        random_state=None,
                        startprob=None,
                        startprob_prior=1.0,
                        transmat=None,
                        transmat_prior=1.0)

    print "Fitting model..."

    model.fit([farm1.get_output()], n_iter=1000)

    print "Predicting hidden states..."
    hidden_states = model.predict(farm1.get_output())

    print "Transition matrix"
    print model.transmat_
        # save tagged sequence to file
        if save:
            filename = filenames[i].replace('.csv', '.tagged.csv')
            observation_sequences[i].save(save + os.sep + filename, include_state=True)
    return likelihood_of_training_data, observations_per_state

### PROTOTYPE ROUTINE

print "Loading training data..."
# Load observation sequences from CSV
observation_sequences, filenames = readObservationSequences(training_data, return_filenames=True)
training_sequences = [ observation_sequence.getNumpyArray() for observation_sequence in observation_sequences ]

print "Training multivariate Gaussian HMM (base model)..."
# Implements (1.), (2.)
base_model = GaussianHMM(n_states, covariance_type=covariance_type, n_iter=num_EM_iterations)
base_model.fit(training_sequences)
# save base model
print "\tSaving base model to file..."
saveModel(base_model, 'base_model', observation_sequences[0].getFeatureNames())
# tag training data using base model
print "\tTagging training data using base model..."
# Implements (3.), (4.)
likelihood_of_training_data, observations_per_state = tagTrainingData( base_model, 
                                                                       training_sequences, 
                                                                       list(observation_sequences), # pass a copy 
                                                                       save='base_model/tagged_training_data', 
                                                                       filenames=filenames )
print "\tTotal log lokelihood of the training data according to base model: %.4f" % likelihood_of_training_data

previous_model = base_model
output_dir = "/Users/sam/Documents/ausbildung/uni/msc_ai/thesis/Models/MultivariateGaussianHMM/keyDown/A/P/"
adaptor = XMLAdaptorMultiWindow1()
for file_path in glob.glob(source):
    observation_sequence = adaptor.convert(file_path)
    if observation_sequence:
        observation_sequence.save(output_dir + "training_observations/" + os.path.basename(file_path) + '.csv')

print "Loading observations from CSV..."

# Load observation sequences from CSV
observation_sequences, filenames = readObservationSequences(output_dir + "training_observations/*.csv", return_filenames=True)
training_sequences = [ observation_sequence.getNumpyArray() for observation_sequence in observation_sequences ]

print "Training Multivariate Gaussian HMM model..."
n_components = 3
model = GaussianHMM(n_components, covariance_type="full", n_iter=10)
model.fit(training_sequences)

# save Gaussian HMM model to file
model_dir = output_dir + '%sstates/' % n_components
mkdir_p(model_dir)
serialiser = HMMSerialiser(model, feature_names=adaptor.getFeatures())
serialiser.saveXML(model_dir + 'model.xml')
 
print "Tagging observation sequences..."
tagged_sequences_dir = model_dir + "tagged_sequences/"
mkdir_p(tagged_sequences_dir)
for i, training_sequence in enumerate(training_sequences):
    hidden_state_sequence = model.predict(training_sequence)
    for j, state in enumerate(hidden_state_sequence):
        observation_sequences[i].getObservation(j).setState( "H%s" % state )
Example #37
0
def main():
    """
    Main function that performs footprint analysis.

    Keyword arguments: None
        
    Return: None
    """

    ###################################################################################################
    # Processing Input Arguments
    ###################################################################################################

    # Initializing ErrorHandler
    error_handler = ErrorHandler()

    # Parameters
    current_version = "0.0.1"
    usage_message = (
        "\n--------------------------------------------------\n"
        "The 'hint' program predicts TFBSs given open chromatin data.\n"
        "In order to use this tools, please type: \n\n"
        "%prog [options] <experiment_matrix>\n\n"
        "The <experiment matrix> should contain:\n"
        "- One region file representing the regions in which the HMM\n"
        "  will be applied. It should contain 'regions' in the type field\n"
        "- One DNase aligned reads file (bam) file with 'DNASE' in the name field.\n"
        "- One to Three histone modification aligned reads file (bam).\n\n"
        "For more information, please refer to:\n"
        "http://www.regulatory-genomics.org/dnasefootprints/\n"
        "--------------------------------------------------")
    version_message = "HINT - Regulatory Analysis Toolbox (RGT). Version: " + str(
        current_version)

    # Initializing Option Parser
    parser = PassThroughOptionParser(usage=usage_message,
                                     version=version_message)

    # Optional Input Options
    parser.add_option(
        "--hmm-file",
        dest="hmm_file",
        type="string",
        metavar="FILE_1[,FILE_2,...,FILE_N]",
        default=None,
        help=
        ("List of HMM files separated by comma. If one file only, then this HMM will be "
         "applied for all histone signals, otherwise, the list must have the same number"
         "of histone files given. The order of the list should be the order of the"
         "histones in the input_matrix file. If the argument is not given, then an HMM"
         "trained with H3K4me3 in K562 will be used."))

    # Parameters Options
    parser.add_option(
        "--organism",
        dest="organism",
        type="string",
        metavar="STRING",
        default="hg19",
        help=
        ("Organism considered on the analysis. Check our full documentation for all available "
         "options. All default files such as genomes will be based on the chosen organism "
         "and the data.config file. This option is used only if a bigbed output is asked."
         ))

    # Output Options
    parser.add_option("--output-location",
                      dest="output_location",
                      type="string",
                      metavar="PATH",
                      default=getcwd(),
                      help=("Path where the output files will be written."))
    parser.add_option("--footprint-name",
                      dest="footprint_name",
                      type="string",
                      metavar="STRING",
                      default="footprints",
                      help=("Name of the footprint file (without extension)."))
    parser.add_option(
        "--print-bb",
        dest="print_bb",
        action="store_true",
        default=False,
        help=("If used, the output will be a bigbed (.bb) file."))

    # Processing Options
    options, arguments = parser.parse_args()
    if (not arguments or len(arguments) > 1):
        error_handler.throw_error("FP_WRONG_ARGUMENT")

    # Fixed Parameters ################
    region_total_ext = 10000
    fp_state_nb = 7
    fp_limit_size = 50
    ###
    dnase_initial_clip = 1000
    dnase_sg_window_size = 9
    dnase_norm_per = 98
    dnase_slope_per = 98
    dnase_frag_ext = 1
    ###
    histone_initial_clip = 1000
    histone_sg_window_size = 201
    histone_norm_per = 98
    histone_slope_per = 98
    histone_frag_ext = 200
    ###################################

    ###################################################################################################
    # Reading Input Matrix
    ###################################################################################################

    # Reading input argument
    input_matrix = arguments[0]

    # Create experimental matrix
    try:
        exp_matrix = ExperimentalMatrix()
        exp_matrix.read(input_matrix)
    except Exception:
        error_handler.throw_error("FP_WRONG_EXPMAT")

    ###################################################################################################
    # Reading Regions
    ###################################################################################################

    # Fetching region file
    region_set_list = exp_matrix.get_regionsets()
    if (len(region_set_list) == 0): error_handler.throw_error("FP_ONE_REGION")
    elif (len(region_set_list) > 1):
        error_handler.throw_warning("FP_ONE_REGION")
    regions = region_set_list[0]

    # Extending + Sorting + Merging / keeping an original copy
    original_regions = deepcopy(regions)
    regions.extend(int(region_total_ext / 2),
                   int(region_total_ext / 2))  # Extending
    regions.merge()  # Sort & Merge

    ###################################################################################################
    # Reading Signals
    ###################################################################################################

    # Initialization
    name_list = exp_matrix.names
    type_list = exp_matrix.types
    file_dict = exp_matrix.files
    dnase_label = "DNASE"

    # Fetching signal files
    dnase_file = None
    histone_file_list = []
    for i in range(0, len(name_list)):
        if (type_list[i] == "regions"): continue
        if (name_list[i].upper() == dnase_label):  # DNase signal
            if (not dnase_file):
                dnase_file = BamFile(file_dict[name_list[i]])
                dnase_file.load_sg_coefs(dnase_sg_window_size)
            else:
                error_handler.throw_warning("FP_MANY_DNASE")
        else:  # Histone signal
            histone_file = BamFile(file_dict[name_list[i]])
            histone_file.load_sg_coefs(histone_sg_window_size)
            histone_file_list.append(histone_file)

    # Handling errors
    if (not dnase_file): error_handler.throw_error("FP_NO_DNASE")
    if (len(histone_file_list) == 0):
        error_handler.throw_error("FP_NO_HISTONE")
    elif (len(histone_file_list) > 3):
        error_handler.throw_warning("FP_MANY_HISTONE")

    ###################################################################################################
    # Creating HMM list
    ###################################################################################################

    # Fetching HMM input
    flag_multiple_hmms = False
    if (options.hmm_file):  # Argument is passed

        # Fetching list of HMM files
        hmm_file_list = options.hmm_file.split(",")

        # Verifying HMM application mode (one HMM or multiple HMM files)
        if (len(hmm_file_list) == 1):
            flag_multiple_hmms = False  # One HMM file only
        elif (len(hmm_file_list) == len(histone_file_name_list)):
            flag_multiple_hmms = True  # One HMM file for each histone
        else:
            error_handler.throw_error("FP_NB_HMMS")

    else:  # Argument was not passed
        flag_multiple_hmms = False
        hmm_data = HmmData()
        hmm_file_list = [hmm_data.get_default_hmm()]

    # Creating scikit HMM list
    hmm_list = []
    for hmm_file_name in hmm_file_list:

        try:
            hmm_scaffold = HMM()
            hmm_scaffold.load_hmm(hmm_file_name)
            scikit_hmm = GaussianHMM(n_components=hmm_scaffold.states,
                                     covariance_type="full",
                                     transmat=array(hmm_scaffold.A),
                                     startprob=array(hmm_scaffold.pi))
            scikit_hmm.means_ = array(hmm_scaffold.means)
            scikit_hmm.covars_ = array(hmm_scaffold.covs)
        except Exception:
            error_handler.throw_error("FP_HMM_FILES")
        hmm_list.append(scikit_hmm)

    ###################################################################################################
    # Main Pipeline
    ###################################################################################################

    # Initializing result set
    footprints = GenomicRegionSet("footprints")

    # Iterating over regions
    for r in regions.sequences:

        # Fetching DNase signal
        try:
            dnase_norm, dnase_slope = dnase_file.get_signal(
                r.chrom, r.initial, r.final, dnase_frag_ext,
                dnase_initial_clip, dnase_norm_per, dnase_slope_per)
        except Exception:
            error_handler.throw_warning(
                "FP_DNASE_PROC",
                add_msg="for region (" +
                ",".join([r.chrom, str(r.initial),
                          str(r.final)]) +
                "). This iteration will be skipped.")
            continue

        # Iterating over histone modifications
        for i in range(0, len(histone_file_list)):

            # Fetching histone signal
            try:
                histone_file = histone_file_list[i]
                histone_norm, histone_slope = histone_file.get_signal(
                    r.chrom, r.initial, r.final, histone_frag_ext,
                    histone_initial_clip, histone_norm_per, histone_slope_per)
            except Exception:
                error_handler.throw_warning(
                    "FP_HISTONE_PROC",
                    add_msg="for region (" +
                    ",".join([r.chrom, str(r.initial),
                              str(r.final)]) + ") and histone modification " +
                    histone_file.file_name +
                    ". This iteration will be skipped for this histone.")
                continue

            # Formatting sequence
            try:
                input_sequence = array(
                    [dnase_norm, dnase_slope, histone_norm, histone_slope]).T
            except Exception:
                error_handler.throw_warning(
                    "FP_SEQ_FORMAT",
                    add_msg="for region (" +
                    ",".join([r.chrom, str(r.initial),
                              str(r.final)]) + ") and histone modification " +
                    histone_file.file_name +
                    ". This iteration will be skipped.")
                continue

            # Applying HMM
            if (flag_multiple_hmms): current_hmm = hmm_list[i]
            else: current_hmm = hmm_list[0]
            try:
                posterior_list = current_hmm.predict(input_sequence)
            except Exception:
                error_handler.throw_warning(
                    "FP_HMM_APPLIC",
                    add_msg="in region (" +
                    ",".join([r.chrom, str(r.initial),
                              str(r.final)]) + ") and histone modification " +
                    histone_file.file_name +
                    ". This iteration will be skipped.")
                continue

            # Writing results
            start_pos = 0
            flag_start = False
            for k in range(r.initial, r.final):
                curr_index = k - r.initial
                if (flag_start):
                    if (posterior_list[curr_index] != fp_state_nb):
                        if (k - start_pos < fp_limit_size):
                            fp = GenomicRegion(r.chrom, start_pos, k)
                            footprints.add(fp)
                        flag_start = False
                else:
                    if (posterior_list[curr_index] == fp_state_nb):
                        flag_start = True
                        start_pos = k
            if (flag_start):
                fp = GenomicRegion(r.chrom, start_pos, r.final)
                footprints.add(fp)

    # Sorting and Merging
    footprints.merge()

    # Overlapping results with original regions
    footprints = footprints.intersect(original_regions,
                                      mode=OverlapType.ORIGINAL)

    ###################################################################################################
    # Writing output
    ###################################################################################################

    # Creating output file
    output_file_name = options.output_location + options.footprint_name + ".bed"
    footprints.write_bed(output_file_name)

    # Verifying condition to write bb
    if (options.print_bb):

        # Fetching file with chromosome sizes
        genome_data = GenomeData(options.organism)
        chrom_sizes_file = genome_data.get_chromosome_sizes()

        # Converting to big bed
        output_bb_name = options.output_location + options.footprint_name + ".bb"
        try:
            system(" ".join([
                "bedToBigBed", output_file_name, chrom_sizes_file,
                output_bb_name
            ]))
            #remove(output_file_name)
        except Exception:
            error_handler.throw_error("FP_BB_CREATION")
Example #38
0
def test_1():
    vm = VonMisesHMM(n_states=5)
    gm = GaussianHMM(n_components=5)
    X1 = np.random.randn(100, 2)
    yield lambda: vm.fit([X1])
    yield lambda: gm.fit([X1])
Example #39
0
def test_2():
    n_features = 3
    length = 32

    for n_states in [4]:
        t1 = np.random.randn(length, n_features)
        means = np.random.randn(n_states, n_features)
        vars = np.random.rand(n_states, n_features)
        transmat = np.random.rand(n_states, n_states)
        transmat = transmat / np.sum(transmat, axis=1)[:, None]
        startprob = np.random.rand(n_states)
        startprob = startprob / np.sum(startprob)

        chmm = GaussianHMMCPUImpl(n_states, n_features)
        chmm._sequences = [t1]

        pyhmm = GaussianHMM(n_components=n_states,
                            init_params='',
                            params='',
                            covariance_type='diag')
        chmm.means_ = means.astype(np.float32)
        chmm.vars_ = vars.astype(np.float32)
        chmm.transmat_ = transmat.astype(np.float32)
        chmm.startprob_ = startprob.astype(np.float32)
        clogprob, cstats = chmm.do_estep()

        pyhmm.means_ = means
        pyhmm.covars_ = vars
        pyhmm.transmat_ = transmat
        pyhmm.startprob_ = startprob

        framelogprob = pyhmm._compute_log_likelihood(t1)
        fwdlattice = pyhmm._do_forward_pass(framelogprob)[1]
        bwdlattice = pyhmm._do_backward_pass(framelogprob)
        gamma = fwdlattice + bwdlattice
        posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T
        stats = pyhmm._initialize_sufficient_statistics()
        pyhmm._accumulate_sufficient_statistics(stats, t1, framelogprob,
                                                posteriors, fwdlattice,
                                                bwdlattice, 'stmc')

        yield lambda: np.testing.assert_array_almost_equal(
            stats['trans'], cstats['trans'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(
            stats['post'], cstats['post'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(
            stats['obs'], cstats['obs'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(
            stats['obs**2'], cstats['obs**2'], decimal=3)
Example #40
0
quantized_set = np.asarray(quantized_set)
nclasses = len(np.unique(classlabels))
hmmclass = []
#print classlabels
print quantized_set.shape
for i in range(0, nclasses):
    newtrainset = []
    for k in range(0, len(classlabels)):
        if classlabels[k] == i:
            #print i
            #print k
            newtrainset.append(quantized_set[:, k])

    newtrainset = np.asarray(newtrainset)
    #print newtrainset.shape
    hmm = HMM(64)
    hmm.fit([newtrainset])
    hmmclass.append(hmm)

#print testingset.shape
rowdivision = datasample.shape[0]
t = []
for i in xrange(int(round(testingset.shape[0] / rowdivision))):
    t.append(
        quantize_data(testingset[rowdivision * i:rowdivision * (i + 1), :],
                      kmms))
#print t.shape
t = np.asarray(t)
rlabels = []
for ts in t:
    i = 0
X = np.asarray(X_training).astype(float)
y = np.array(y_training).astype(float)
X_test = np.asarray(X_test).astype(float)
# take diff of close value
# this makes len(diff) = len(close_t) - 1
# therefore, others quantity also need to be shifted
# pack diff and volume for training
X = np.column_stack([X])

###############################################################################
# Run Gaussian HMM
print("fitting to HMM and decoding ..."),
n_components = 2

# make an HMM instance and execute fit
model = GaussianHMM(n_components, "diag")
model.fit([X])

# predict the optimal sequence of internal hidden state
hidden_states = model.predict(X_test)
for i in range(0,50):
    print(hidden_states[i]),
print("done\n")

###############################################################################
# print trained parameters and plot
print("Transition matrix")
print (model.transmat_)
print ("")

print ("means and vars of each hidden state")
Example #42
0
# this makes len(diff) = len(close_t) - 1
# therefore, others quantity also need to be shifted
diff = close_v[1:] - close_v[:-1]
dates = dates[1:]
close_v = close_v[1:]

# pack diff and volume for training
X = np.column_stack([diff, volume])

###############################################################################
# Run Gaussian HMM
print "fitting to HMM and decoding ...",
n_components = 5

# make an HMM instance and execute fit
model = GaussianHMM(n_components, "diag")
model.fit([X], n_iter=1000)

# predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

print "done\n"

###############################################################################
# print trained parameters and plot
print "Transition matrix"
print model.transmat_
print ""

print "means and vars of each hidden state"
for i in xrange(n_components):
X_new = X_new * 1000

#n_features = sum(good_features2)
n_features = X_new.shape[1]

print(n_features)
# # clf = svm.SVC()
# # clf.fit(X_new, y)

# hmm = MultinomialHMM()
# pos = np.where(np.diff(y) != 0)[0]
# d = np.hstack([0, pos+1, len(y)])
# lens = np.diff(d)
# hmm.fit(X_new, y, lens)

hmm = GaussianHMM(n_components=20)
hmm.fit([X_new])

clusters = pred = hmm.predict(X_new)

# neigh = KNeighborsClassifier(n_neighbors=10, weights='distance')
# scores = cross_validation.cross_val_score(neigh, X_new, y, cv=5)
# print(scores)
# # neigh.fit(X_new, y)

# good_features = ETC.feature_importances_ >= 0.0005
# print(np.sum(good_features))
# X_new2 = X[..., good_features]

# n_features = 20
# pca = PCA(n_components = n_features)
        list_of_patient_feats, start_stop_idx, list_of_patient_file_paths = string_patient_feats(train_map, condition, overlap, window)
        #sirs_feats_stacked = stack_patient_feats(list_of_sirs_patients)
        feats_as_list = list_patient_feats(list_of_patient_feats)
        #print np.shape(sirs_feats_stacked)
        means, covs = get_initial_states(pre_states, condition, feature, end=False, start=False, cov_type=cov_type)
        print means
        print covs
        if cov_type == 'full':
            for i in range(n_states):
                print 'checking if initial covs are pos-definite'
                np.linalg.cholesky(covs[i])
                print np.linalg.eigvals(covs[i])
        tmat, smat = get_tmat_and_smat(pre_states, end=False, start=False)
        print tmat, smat
        model = GaussianHMM(n_components=n_states, n_iter=n_iter, covariance_type=cov_type, startprob=smat, transmat=tmat, init_params='mc')
        model.means_ = means
        model.covars_ = covs
        sum_inital_ll = 0.0
        sum_initial_score = 0.0
        sum_initial_map = 0.0
        remove_idx = []
        for idx, feat_from_list in enumerate(feats_as_list):
            if np.shape(feat_from_list)[0] > n_states:
                initial_ll, initial_best_seq = model.decode(feat_from_list)
                initial_map, initial_best_sep_map = model.decode(feat_from_list, algorithm='map')
                sum_initial_score += model.score(feat_from_list)
                sum_inital_ll += initial_ll
                sum_initial_map += initial_map
            else:
                remove_idx.append(idx)
Example #45
0
            trimmed_count = 0
        counts.append(trimmed_count)
        kmer_stash.append(kmer)
    i += 1

if not len(counts):
    sys.exit(
        "No k-mer counts remain after filtering; check thresholds and try again."
    )

## fit HMM to counts
if len(args.mu) != len(args.sigmasq):
    sys.exit("Vectors of prior means and variances must be same length.")

counts = np.reshape(np.log1p(np.array(counts, dtype="int")), (-1, 1))
hmm = GaussianHMM(len(args.mu))
hmm.fit([counts])

if args.verbose:
    sys.stderr.write(
        "Fitting HMM to k-mer counts, assuming {} hidden states...\n".format(
            len(args.mu)))
    sys.stderr.write("means:\n" + str(hmm.means_) + "\n")
    sys.stderr.write("covariances:\n" + str(hmm.covars_) + "\n")
    sys.stderr.write("\n")
    sys.stderr.write("Processing possible variant sites...\n")
    sys.stderr.write("\trejecting haplotypes with read count < {}\n".format(
        args.maf))
    sys.stderr.write(
        "\taccepting as TE/ME any haplotype with max count > {}\n".format(
            args.maxhits))
Example #46
0
def train_hmm(X):
    hmm = GaussianHMM(n_components=8)
    hmm.fit(X);
    print hmm.score(X[0])
    print np.shape(X[0])
    return hmm
	volume = []
	for row in data:
		if row[1] != 'close':
			#list = []
			#for i in range(len(row)-2):
			#	list.append(float(row[i+1]))
			label = float(row[7])
			volume.append(float(row[2]))
			if label > 0:
				indices.append(1)
			else:
				indices.append(0)
			#matrix.append(list)

X = numpy.column_stack([numpy.array(indices), numpy.array(volume)])
model = GaussianHMM(2, covariance_type="diag", n_iter=1000)

model.fit([X])


"""
reading the dato to be classified
"""
with open('hackathon-master/AAPL-test.csv', 'rb') as csvfile:
	data = csv.reader(csvfile, delimiter=',')
	#matrix = []
	volume = []
	labels = []
	for row in data:
		if row[1] != 'close':
			list = []
Example #48
0
# this makes len(diff) = len(close_t) - 1
# therefore, others quantity also need to be shifted
diff = close_v[1:] - close_v[:-1]
dates = dates[1:]
close_v = close_v[1:]

# pack diff and volume for training
X = np.column_stack([diff, volume])

###############################################################################
# Run Gaussian HMM
print "fitting to HMM and decoding ...",
n_components = 2

# make an HMM instance and execute fit
model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)

model.fit([X])

# predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

print "done\n"

###############################################################################
# print trained parameters and plot
print "Transition matrix"
print model.transmat_
print ""

print "means and vars of each hidden state"
    volume = []
    for row in data:
        if row[1] != 'close':
            #list = []
            #for i in range(len(row)-2):
            #	list.append(float(row[i+1]))
            label = float(row[7])
            volume.append(float(row[2]))
            if label > 0:
                indices.append(1)
            else:
                indices.append(0)
            #matrix.append(list)

X = numpy.column_stack([numpy.array(indices), numpy.array(volume)])
model = GaussianHMM(2, covariance_type="diag", n_iter=1000)

model.fit([X])
"""
reading the dato to be classified
"""
with open('hackathon-master/AAPL-test.csv', 'rb') as csvfile:
    data = csv.reader(csvfile, delimiter=',')
    #matrix = []
    volume = []
    labels = []
    for row in data:
        if row[1] != 'close':
            list = []
            volume.append(float(row[2]))
            #for i in range(len(row)-2):
Example #50
0
def test_2():
    np.random.seed(42)
    n_features = 32
    length = 20

    #for n_states in [3, 4, 5, 7, 8, 9, 15, 16, 17, 31, 32]:
    for n_states in [8]:
        t1 = np.random.randn(length, n_features)
        means = np.random.randn(n_states, n_features)
        vars = np.random.rand(n_states, n_features)
        transmat = np.random.rand(n_states, n_states)
        transmat = transmat / np.sum(transmat, axis=1)[:, None]
        startprob = np.random.rand(n_states)
        startprob = startprob / np.sum(startprob)

        cuhmm = GaussianHMMCUDAImpl(n_states, n_features)
        cuhmm._sequences = [t1]

        pyhmm = GaussianHMM(n_components=n_states,
                            init_params='',
                            params='',
                            covariance_type='diag')
        cuhmm.means_ = means
        cuhmm.vars_ = vars
        cuhmm.transmat_ = transmat
        cuhmm.startprob_ = startprob
        logprob, custats = cuhmm.do_estep()

        pyhmm.means_ = means
        pyhmm.covars_ = vars
        pyhmm.transmat_ = transmat
        pyhmm.startprob_ = startprob
        pyhmm._initialize_sufficient_statistics()

        framelogprob = pyhmm._compute_log_likelihood(t1)
        cuframelogprob = cuhmm._get_framelogprob()
        yield lambda: np.testing.assert_array_almost_equal(
            framelogprob, cuframelogprob, decimal=3)

        fwdlattice = pyhmm._do_forward_pass(framelogprob)[1]
        cufwdlattice = cuhmm._get_fwdlattice()
        yield lambda: np.testing.assert_array_almost_equal(
            fwdlattice, cufwdlattice, decimal=3)

        bwdlattice = pyhmm._do_backward_pass(framelogprob)
        cubwdlattice = cuhmm._get_bwdlattice()
        yield lambda: np.testing.assert_array_almost_equal(
            bwdlattice, cubwdlattice, decimal=3)

        gamma = fwdlattice + bwdlattice
        posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T
        cuposteriors = cuhmm._get_posteriors()
        yield lambda: np.testing.assert_array_almost_equal(
            posteriors, cuposteriors, decimal=3)

        stats = pyhmm._initialize_sufficient_statistics()
        pyhmm._accumulate_sufficient_statistics(stats, t1, framelogprob,
                                                posteriors, fwdlattice,
                                                bwdlattice, 'stmc')

        print 'ref transcounts'
        print transitioncounts(cufwdlattice, cubwdlattice, cuframelogprob,
                               np.log(transmat))
        print 'cutranscounts'
        print custats['trans']

        yield lambda: np.testing.assert_array_almost_equal(
            stats['trans'], custats['trans'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(
            stats['post'], custats['post'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(
            stats['obs'], custats['obs'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(
            stats['obs**2'], custats['obs**2'], decimal=3)
Example #51
0
class GaussianHmmLib:
    """
    ref: http://scikit-learn.org/0.14/auto_examples/applications/plot_hmm_stock_analysis.html
    https://www.quantopian.com/posts/inferring-latent-states-using-a-gaussian-hidden-markov-model
    bear market: smaller mean, higher variant
    bull market: higher mean, smaller variant
    """
    def __init__(self, dbhandler, *args, **kwargs):
        self.dbhandler = dbhandler
        self.sids = self.dbhandler.stock.ids
        self.n_components = int(kwargs.pop('n_components')) or 5
        self.n_iter = int(kwargs.pop('n_iter')) or 1000

    def run(self, data):
        sid = self.sids[0]
        self.dates = data[sid]['price'].values
        self.close_v = data[sid]['close_v'].values
        self.volume = data[sid]['volume'].values[1:]

        # take diff of close value
        # this makes len(diff) = len(close_t) - 1
        # therefore, others quantity also need to be shifted
        self.diff = self.close_v[1:] - self.close_v[:-1]

        # pack diff and volume for training
        self.X = np.column_stack([self.diff, self.volume])

        # make an HMM instance and execute fit
        self.model = GaussianHMM(self.n_components,
                                 covariance_type="diag",
                                 n_iter=self.n_iter)
        self.model.fit([self.X], n_iter=self.n_iter)

        # predict the optimal sequence of internal hidden state
        self.hidden_states = self.model.predict(self.X)

    def report(self):
        # print trained parameters and plot
        print "Transition matrix"
        print self.model.transmat_
        print ""

        print "means and vars of each hidden state"
        for i in xrange(self.n_components):
            print "%dth hidden state" % i
            print "mean = ", self.model.means_[i]
            print "var = ", np.diag(self.model.covars_[i])
            print ""

        years = YearLocator()  # every year
        months = MonthLocator()  # every month
        yearsFmt = DateFormatter('%Y')
        fig = plt.figure()
        ax = fig.add_subplot(111)

        for i in xrange(self.n_components):
            # use fancy indexing to plot data in each state
            idx = (self.hidden_states == i)
            ax.plot_date(self.dates[idx],
                         self.close_v[idx],
                         'o',
                         label="%dth hidden state" % i)
        ax.legend()

        # format the ticks
        ax.xaxis.set_major_locator(years)
        ax.xaxis.set_major_formatter(yearsFmt)
        ax.xaxis.set_minor_locator(months)
        ax.autoscale_view()

        # format the coords message box
        ax.fmt_xdata = DateFormatter('%Y-%m-%d')
        ax.fmt_ydata = lambda x: '$%1.2f' % x
        ax.grid(True)

        fig.autofmt_xdate()
        plt.savefig("gaussianhmm_%s.png" % (self.sids[0]))
Example #52
0
def test_2():
    np.random.seed(42)
    n_features = 32
    length = 20

    #for n_states in [3, 4, 5, 7, 8, 9, 15, 16, 17, 31, 32]:
    for n_states in [8]:
        t1 = np.random.randn(length, n_features)
        means = np.random.randn(n_states, n_features)
        vars = np.random.rand(n_states, n_features)
        transmat = np.random.rand(n_states, n_states)
        transmat = transmat / np.sum(transmat, axis=1)[:, None]
        startprob = np.random.rand(n_states)
        startprob = startprob / np.sum(startprob)

        cuhmm = GaussianHMMCUDAImpl(n_states, n_features)
        cuhmm._sequences = [t1]

        pyhmm = GaussianHMM(n_components=n_states, init_params='', params='', covariance_type='diag')
        cuhmm.means_ = means
        cuhmm.vars_ = vars
        cuhmm.transmat_ = transmat
        cuhmm.startprob_ = startprob
        logprob, custats = cuhmm.do_estep()

        pyhmm.means_ = means
        pyhmm.covars_ = vars
        pyhmm.transmat_ = transmat
        pyhmm.startprob_ = startprob
        pyhmm._initialize_sufficient_statistics()

        framelogprob = pyhmm._compute_log_likelihood(t1)
        cuframelogprob = cuhmm._get_framelogprob()
        yield lambda: np.testing.assert_array_almost_equal(framelogprob, cuframelogprob, decimal=3)

        fwdlattice = pyhmm._do_forward_pass(framelogprob)[1]
        cufwdlattice = cuhmm._get_fwdlattice()
        yield lambda: np.testing.assert_array_almost_equal(fwdlattice, cufwdlattice, decimal=3)

        bwdlattice = pyhmm._do_backward_pass(framelogprob)
        cubwdlattice = cuhmm._get_bwdlattice()
        yield lambda: np.testing.assert_array_almost_equal(bwdlattice, cubwdlattice, decimal=3)

 
        gamma = fwdlattice + bwdlattice
        posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T
        cuposteriors = cuhmm._get_posteriors()
        yield lambda: np.testing.assert_array_almost_equal(posteriors, cuposteriors, decimal=3)

        stats = pyhmm._initialize_sufficient_statistics()
        pyhmm._accumulate_sufficient_statistics(
            stats, t1, framelogprob, posteriors, fwdlattice,
            bwdlattice, 'stmc')

        print 'ref transcounts'
        print transitioncounts(cufwdlattice, cubwdlattice, cuframelogprob, np.log(transmat))
        print 'cutranscounts'
        print custats['trans']

        yield lambda: np.testing.assert_array_almost_equal(stats['trans'], custats['trans'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(stats['post'], custats['post'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(stats['obs'], custats['obs'], decimal=3)
        yield lambda: np.testing.assert_array_almost_equal(stats['obs**2'], custats['obs**2'], decimal=3)
Example #53
0
""" agelessmojo bot implementation """
from __future__ import division  # for floating point division
import os, json
import numpy as np
from sklearn.hmm import GaussianHMM
from bottle import get, post, request, run, response

PORT = os.getenv('VCAP_APP_PORT')
HOST = os.getenv('VCAP_APP_HOST')

HMM = GaussianHMM(50, "diag")

# TODO Variables here

WINDOW_SIZE = 5

LAP_DATA = {}
LAP_DATA_SMOOTHED = {}
LAP_COUNT = 0
LAP_ITERATOR = 0


@get('/ping')
def ping():
    """ Check for bot health. Returns success in text/plain. """

    response.headers['Content-Type'] = 'text/plain'
    return "success"


def send_power_control(power):
Example #54
0
matrix with some extra bells and whisles. But, because of the way the E-step works
currently, the means and covariances are estimated exactly as with a Gaussian HMM.

Then, afterwards, the A, b and Q are estimated. So, we can do a lot of testing
by comparing to a reference gaussian HMM implementation
'''
import string
import numpy as np
from sklearn.hmm import GaussianHMM
from sklearn.utils.extmath import logsumexp
from mixtape.mslds import MetastableSwitchingLDS
from mixtape import _switching_var1

N_STATES = 2
data = [np.random.randn(100, 3), np.random.randn(100, 3)]
refmodel = GaussianHMM(n_components=N_STATES, covariance_type='full').fit(data)


def _sklearn_estep():
    # copied from sklearn/hmm.py#L440
    curr_logprob = 0
    stats = refmodel._initialize_sufficient_statistics()
    stats['post[1:]'] = np.zeros(refmodel.n_components)
    stats['post[:-1]'] = np.zeros(refmodel.n_components)
    stats['obs[1:]'] = np.zeros((refmodel.n_components, refmodel.n_features))
    stats['obs[:-1]'] = np.zeros((refmodel.n_components, refmodel.n_features))
    stats['obs*obs[t-1].T'] = np.zeros(
        (refmodel.n_components, refmodel.n_features, refmodel.n_features))
    stats['obs[1:]*obs[1:].T'] = np.zeros(
        (refmodel.n_components, refmodel.n_features, refmodel.n_features))
    stats['obs[:-1]*obs[:-1].T'] = np.zeros(
Example #55
0
## initialize hmm parameters
rs = check_random_state(None)  # fix RNG seed? maybe?

means = np.array([[0.0, 0.0], [np.log1p(args.coverage), 0.0],
                  [0.0, np.log1p(args.coverage)],
                  [np.log1p(args.coverage / 2),
                   np.log1p(args.coverage / 2)],
                  [np.log1p(args.coverage),
                   np.log1p(args.coverage)]])
cv = 1.0
covars = np.array([[0.01, 0.01], [cv, 0.01], [0.01, cv], [cv / 2, cv / 2],
                   [cv, cv]])
hidden = ["private"] + ref_samples + ["heterozygous", "pseudohet"]

hmm = GaussianHMM(n_components=len(means), random_state=rs)
hmm._set_means(means)
hmm._set_covars(covars)

## filter sites; compute observation sequence as log(1+count)
keep = np.logical_and((counts.max(1) < args.X_max * args.coverage),
                      (counts.sum(1) > -1.0))
counts = counts[keep, :]
obs = np.log1p(counts)
starts = np.array([start for start, end in ivls]).reshape((len(ivls), 1))
starts = starts[keep, :]

## run hmm
states = hmm.decode(obs)

## print result to stdout