예제 #1
0
    def __init__(self, preprocess_args, metric, graph_structure_type, A, B, pi,
                 obs_bins, win_len, thresh, min_peak_dist):
        """
        Args:
            preprocess_args:
            metric:
            graph_structure_type: "predefined", "fully", "left_to_right"
            A: initial hidden states graph
            B: initial hidden states distribution
            pi: initial hidden states probabilities
            obs_bins: bins used in the hidden states distribution
            win_len: windows lengths of the sliding window offline
            thresh: in the peak detection, detect peaks that are greater than
                    thresh
            min_peak_dist: in the peak detection, detect peaks that are at
                           least separated by minimum peak distance
        """

        self.preprocess_args = preprocess_args
        self.metric = metric

        self.graph_structure_type = graph_structure_type
        self.A = A
        self.B = B
        self.pi = pi
        self.obs_bins = obs_bins
        self.win_len = win_len
        self.thresh = thresh
        self.min_peak_dist = min_peak_dist

        m = len(self.B[0])  # num of symbols
        self.emission_domain = ghmm.IntegerRange(0, m)
        self.emission_distr = ghmm.DiscreteDistribution(self.emission_domain)
예제 #2
0
    def _generate_alphabet(self, num_symbols):
        """Generate a simple integer alphabet: [0:num_symbols-1]

        :param num_symbols: The number of different qsr symbols

        :return: The ghmm integer range object to be used as an alphabet
        """
        return gh.IntegerRange(0, num_symbols)
예제 #3
0
파일: wlncRNA.py 프로젝트: tomkp75/ngslib
 def decodeHMM(m, hmmStates):
     ''' Decode HMM. '''
     print >> sys.stderr, printTime(), "Decode HMM for each chromosome."
     sigma = ghmm.IntegerRange(0, 2)
     for chrom in hmmStates:
         print >> sys.stderr, printTime(), chrom
         state, score = m.viterbi(
             ghmm.EmissionSequence(sigma, hmmStates[chrom]))
         hmmStates[chrom] = state
     print >> sys.stderr, printTime(), "Decode HMM finished."
     print >> sys.stderr
예제 #4
0
 def setUp(self):
     '''Create a simple dice rolling HMM'''
     self.sigma = g.IntegerRange(1, 7)
     self.A = [[0.9, 0.1], [0.3, 0.7]]
     efair = [1.0 / 6] * 6
     eloaded = [3.0 / 13, 3.0 / 13, 2.0 / 13, 2.0 / 13, 2.0 / 13, 1.0 / 13]
     self.B = [efair, eloaded]
     self.pi = [0.5] * 2
     self.m = g.HMMFromMatrices(self.sigma,
                                g.DiscreteDistribution(self.sigma), self.A,
                                self.B, self.pi)
예제 #5
0
파일: wlncRNA.py 프로젝트: tomkp75/ngslib
 def trainHMM(hmmState):
     ''' Train HMM with the given chromosome. '''
     print >> sys.stderr, printTime(), "Train HMM with one chromosome."
     T = [[0.9, 0.1], [0.1, 0.9]]
     e1 = [0.1, 0.9]
     e0 = [0.9, 0.1]
     E = [e0, e1]
     pi = [0.9, 0.1]  # initial 10% are peak?
     sigma = ghmm.IntegerRange(0, 2)  # 0, 1
     m = ghmm.HMMFromMatrices(sigma, ghmm.DiscreteDistribution(sigma), T, E,
                              pi)
     m.baumWelch(ghmm.EmissionSequence(sigma, hmmState))
     print >> sys.stderr, printTime(), "Train HMM finished."
     print >> sys.stderr
     return m
예제 #6
0
# %%
import ghmm

# %%
sigma = ghmm.IntegerRange(1, 7)

train_seq = ghmm.SequenceSet(
    sigma,
    [[1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1]])

A = [[0.99, 0.01], [0.99, 0.01]]

B = [[1.0 / 6] * 6] * 2

pi = [0.5] * 2

m = ghmm.HMMFromMatrices(sigma, ghmm.DiscreteDistribution(sigma), A, B, pi)

m.baumWelch(train_seq, 100000000, 0.000000000000001)

print(m.asMatrices())
# %%
print(map(sigma.external, m.sampleSingle(20)))
# %%
v = m.viterbi(test_seq)
print v

# %%
my_seq = ghmm.EmissionSequence(sigma, [1] * 20 + [6] * 10 + [1] * 40)
print m.viterbi(my_seq)
예제 #7
0
 def generateAlphabet(self, num_symbols):
     return gh.IntegerRange(0, num_symbols)
예제 #8
0
            total_grams = 0
            total_above_threshold = 0
            total_threshold_correct = 0

            for i, split in enumerate(kf.split(event_list)):
                event_list = np.array(event_list)

                train_data_raw = event_list[split[0]]
                test_data_raw = event_list[split[1]]

                train_data, train_vocab = prepare_data(train_data_raw)
                test_data, test_vocab = prepare_data(test_data_raw)

                # Create and train model
                vocab_len = max(train_vocab) + 1
                sigma = ghmm.IntegerRange(0, vocab_len)  # Emission range

                # Transition Matrix
                A = calculate_transition_probabilities(n_components)

                # Emission Probabilities
                B = calculate_emission_probabilities(train_data, n_components,
                                                     vocab_len)

                # Initial State Distribution
                pi = [
                    1.0 / n_components
                ] * n_components  # Equally distribute the starting probabilities

                m = ghmm.HMMFromMatrices(sigma,
                                         ghmm.DiscreteDistribution(sigma), A,