def __init__(self, preprocess_args, metric, graph_structure_type, A, B, pi, obs_bins, win_len, thresh, min_peak_dist): """ Args: preprocess_args: metric: graph_structure_type: "predefined", "fully", "left_to_right" A: initial hidden states graph B: initial hidden states distribution pi: initial hidden states probabilities obs_bins: bins used in the hidden states distribution win_len: windows lengths of the sliding window offline thresh: in the peak detection, detect peaks that are greater than thresh min_peak_dist: in the peak detection, detect peaks that are at least separated by minimum peak distance """ self.preprocess_args = preprocess_args self.metric = metric self.graph_structure_type = graph_structure_type self.A = A self.B = B self.pi = pi self.obs_bins = obs_bins self.win_len = win_len self.thresh = thresh self.min_peak_dist = min_peak_dist m = len(self.B[0]) # num of symbols self.emission_domain = ghmm.IntegerRange(0, m) self.emission_distr = ghmm.DiscreteDistribution(self.emission_domain)
def _generate_alphabet(self, num_symbols): """Generate a simple integer alphabet: [0:num_symbols-1] :param num_symbols: The number of different qsr symbols :return: The ghmm integer range object to be used as an alphabet """ return gh.IntegerRange(0, num_symbols)
def decodeHMM(m, hmmStates): ''' Decode HMM. ''' print >> sys.stderr, printTime(), "Decode HMM for each chromosome." sigma = ghmm.IntegerRange(0, 2) for chrom in hmmStates: print >> sys.stderr, printTime(), chrom state, score = m.viterbi( ghmm.EmissionSequence(sigma, hmmStates[chrom])) hmmStates[chrom] = state print >> sys.stderr, printTime(), "Decode HMM finished." print >> sys.stderr
def setUp(self): '''Create a simple dice rolling HMM''' self.sigma = g.IntegerRange(1, 7) self.A = [[0.9, 0.1], [0.3, 0.7]] efair = [1.0 / 6] * 6 eloaded = [3.0 / 13, 3.0 / 13, 2.0 / 13, 2.0 / 13, 2.0 / 13, 1.0 / 13] self.B = [efair, eloaded] self.pi = [0.5] * 2 self.m = g.HMMFromMatrices(self.sigma, g.DiscreteDistribution(self.sigma), self.A, self.B, self.pi)
def trainHMM(hmmState): ''' Train HMM with the given chromosome. ''' print >> sys.stderr, printTime(), "Train HMM with one chromosome." T = [[0.9, 0.1], [0.1, 0.9]] e1 = [0.1, 0.9] e0 = [0.9, 0.1] E = [e0, e1] pi = [0.9, 0.1] # initial 10% are peak? sigma = ghmm.IntegerRange(0, 2) # 0, 1 m = ghmm.HMMFromMatrices(sigma, ghmm.DiscreteDistribution(sigma), T, E, pi) m.baumWelch(ghmm.EmissionSequence(sigma, hmmState)) print >> sys.stderr, printTime(), "Train HMM finished." print >> sys.stderr return m
# %% import ghmm # %% sigma = ghmm.IntegerRange(1, 7) train_seq = ghmm.SequenceSet( sigma, [[1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1]]) A = [[0.99, 0.01], [0.99, 0.01]] B = [[1.0 / 6] * 6] * 2 pi = [0.5] * 2 m = ghmm.HMMFromMatrices(sigma, ghmm.DiscreteDistribution(sigma), A, B, pi) m.baumWelch(train_seq, 100000000, 0.000000000000001) print(m.asMatrices()) # %% print(map(sigma.external, m.sampleSingle(20))) # %% v = m.viterbi(test_seq) print v # %% my_seq = ghmm.EmissionSequence(sigma, [1] * 20 + [6] * 10 + [1] * 40) print m.viterbi(my_seq)
def generateAlphabet(self, num_symbols): return gh.IntegerRange(0, num_symbols)
total_grams = 0 total_above_threshold = 0 total_threshold_correct = 0 for i, split in enumerate(kf.split(event_list)): event_list = np.array(event_list) train_data_raw = event_list[split[0]] test_data_raw = event_list[split[1]] train_data, train_vocab = prepare_data(train_data_raw) test_data, test_vocab = prepare_data(test_data_raw) # Create and train model vocab_len = max(train_vocab) + 1 sigma = ghmm.IntegerRange(0, vocab_len) # Emission range # Transition Matrix A = calculate_transition_probabilities(n_components) # Emission Probabilities B = calculate_emission_probabilities(train_data, n_components, vocab_len) # Initial State Distribution pi = [ 1.0 / n_components ] * n_components # Equally distribute the starting probabilities m = ghmm.HMMFromMatrices(sigma, ghmm.DiscreteDistribution(sigma), A,