def get_vntr_matcher_hmm(self, read_length): """Try to load trained HMM for this VNTR If there was no trained HMM, it will build one and store it for later usage """ logging.info('Using read length %s' % read_length) copies = self.get_copies_for_hmm(read_length) base_name = str( self.reference_vntr.id) + '_' + str(read_length) + '.json' stored_hmm_file = settings.TRAINED_HMMS_DIR + base_name if settings.USE_TRAINED_HMMS and os.path.isfile(stored_hmm_file): model = Model() model = model.from_json(stored_hmm_file) return model flanking_region_size = read_length vntr_matcher = self.build_vntr_matcher_hmm(copies, flanking_region_size) json_str = vntr_matcher.to_json() with open(stored_hmm_file, 'w') as outfile: outfile.write(json_str) return vntr_matcher
def build_reference_repeat_finder_hmm(patterns, copies=1): pattern = patterns[0] model = Model(name="HMM Model") insert_distribution = DiscreteDistribution({ 'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25 }) last_end = None start_random_matches = State(insert_distribution, name='start_random_matches') end_random_matches = State(insert_distribution, name='end_random_matches') model.add_states([start_random_matches, end_random_matches]) for repeat in range(copies): insert_states = [] match_states = [] delete_states = [] for i in range(len(pattern) + 1): insert_states.append( State(insert_distribution, name='I%s_%s' % (i, repeat))) for i in range(len(pattern)): distribution_map = dict({ 'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01 }) distribution_map[pattern[i]] = 0.97 match_states.append( State(DiscreteDistribution(distribution_map), name='M%s_%s' % (str(i + 1), repeat))) for i in range(len(pattern)): delete_states.append( State(None, name='D%s_%s' % (str(i + 1), repeat))) unit_start = State(None, name='unit_start_%s' % repeat) unit_end = State(None, name='unit_end_%s' % repeat) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) last = len(delete_states) - 1 if repeat > 0: model.add_transition(last_end, unit_start, 0.5) else: model.add_transition(model.start, unit_start, 0.5) model.add_transition(model.start, start_random_matches, 0.5) model.add_transition(start_random_matches, unit_start, 0.5) model.add_transition(start_random_matches, start_random_matches, 0.5) model.add_transition(unit_end, end_random_matches, 0.5) if repeat == copies - 1: model.add_transition(unit_end, model.end, 0.5) model.add_transition(end_random_matches, end_random_matches, 0.5) model.add_transition(end_random_matches, model.end, 0.5) model.add_transition(unit_start, match_states[0], 0.98) model.add_transition(unit_start, delete_states[0], 0.01) model.add_transition(unit_start, insert_states[0], 0.01) model.add_transition(insert_states[0], insert_states[0], 0.01) model.add_transition(insert_states[0], delete_states[0], 0.01) model.add_transition(insert_states[0], match_states[0], 0.98) model.add_transition(delete_states[last], unit_end, 0.99) model.add_transition(delete_states[last], insert_states[last + 1], 0.01) model.add_transition(match_states[last], unit_end, 0.99) model.add_transition(match_states[last], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], unit_end, 0.99) for i in range(0, len(pattern)): model.add_transition(match_states[i], insert_states[i + 1], 0.01) model.add_transition(delete_states[i], insert_states[i + 1], 0.01) model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01) if i < len(pattern) - 1: model.add_transition(insert_states[i + 1], match_states[i + 1], 0.98) model.add_transition(insert_states[i + 1], delete_states[i + 1], 0.01) model.add_transition(match_states[i], match_states[i + 1], 0.98) model.add_transition(match_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], match_states[i + 1], 0.98) last_end = unit_end model.bake() if len(patterns) > 1: # model.fit(patterns, algorithm='baum-welch', transition_pseudocount=1, use_pseudocount=True) fit_patterns = [pattern * copies for pattern in patterns] model.fit(fit_patterns, algorithm='viterbi', transition_pseudocount=1, use_pseudocount=True) return model
def get_constant_number_of_repeats_matcher_hmm(patterns, copies): model = Model(name="Repeating Pattern Matcher HMM Model") transitions, emissions = build_profile_hmm_for_repeats( patterns, settings.MAX_ERROR_RATE) matches = [m for m in emissions.keys() if m.startswith('M')] last_end = None for repeat in range(copies): insert_states = [] match_states = [] delete_states = [] for i in range(len(matches) + 1): insert_distribution = DiscreteDistribution(emissions['I%s' % i]) insert_states.append( State(insert_distribution, name='I%s_%s' % (i, repeat))) for i in range(1, len(matches) + 1): match_distribution = DiscreteDistribution(emissions['M%s' % i]) match_states.append( State(match_distribution, name='M%s_%s' % (str(i), repeat))) for i in range(1, len(matches) + 1): delete_states.append(State(None, name='D%s_%s' % (str(i), repeat))) unit_start = State(None, name='unit_start_%s' % repeat) unit_end = State(None, name='unit_end_%s' % repeat) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) n = len(delete_states) - 1 if repeat > 0: model.add_transition(last_end, unit_start, 1) else: model.add_transition(model.start, unit_start, 1) if repeat == copies - 1: model.add_transition(unit_end, model.end, 1) model.add_transition(unit_start, match_states[0], transitions['unit_start']['M1']) model.add_transition(unit_start, delete_states[0], transitions['unit_start']['D1']) model.add_transition(unit_start, insert_states[0], transitions['unit_start']['I0']) model.add_transition(insert_states[0], insert_states[0], transitions['I0']['I0']) model.add_transition(insert_states[0], delete_states[0], transitions['I0']['D1']) model.add_transition(insert_states[0], match_states[0], transitions['I0']['M1']) model.add_transition(delete_states[n], unit_end, transitions['D%s' % (n + 1)]['unit_end']) model.add_transition(delete_states[n], insert_states[n + 1], transitions['D%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(match_states[n], unit_end, transitions['M%s' % (n + 1)]['unit_end']) model.add_transition(match_states[n], insert_states[n + 1], transitions['M%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(insert_states[n + 1], insert_states[n + 1], transitions['I%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(insert_states[n + 1], unit_end, transitions['I%s' % (n + 1)]['unit_end']) for i in range(1, len(matches) + 1): model.add_transition(match_states[i - 1], insert_states[i], transitions['M%s' % i]['I%s' % i]) model.add_transition(delete_states[i - 1], insert_states[i], transitions['D%s' % i]['I%s' % i]) model.add_transition(insert_states[i], insert_states[i], transitions['I%s' % i]['I%s' % i]) if i < len(matches): model.add_transition(insert_states[i], match_states[i], transitions['I%s' % i]['M%s' % (i + 1)]) model.add_transition(insert_states[i], delete_states[i], transitions['I%s' % i]['D%s' % (i + 1)]) model.add_transition(match_states[i - 1], match_states[i], transitions['M%s' % i]['M%s' % (i + 1)]) model.add_transition(match_states[i - 1], delete_states[i], transitions['M%s' % i]['D%s' % (i + 1)]) model.add_transition(delete_states[i - 1], match_states[i], transitions['D%s' % i]['M%s' % (i + 1)]) model.add_transition(delete_states[i - 1], delete_states[i], transitions['D%s' % i]['D%s' % (i + 1)]) last_end = unit_end model.bake(merge=None) return model
def get_suffix_matcher_hmm(pattern): model = Model(name="Suffix Matcher HMM Model") insert_distribution = DiscreteDistribution({ 'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25 }) insert_states = [] match_states = [] delete_states = [] hmm_name = 'suffix' for i in range(len(pattern) + 1): insert_states.append( State(insert_distribution, name='I%s_%s' % (i, hmm_name))) for i in range(len(pattern)): distribution_map = dict({'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01}) distribution_map[pattern[i]] = 0.97 match_states.append( State(DiscreteDistribution(distribution_map), name='M%s_%s' % (str(i + 1), hmm_name))) for i in range(len(pattern)): delete_states.append( State(None, name='D%s_%s' % (str(i + 1), hmm_name))) unit_start = State(None, name='suffix_start_%s' % hmm_name) unit_end = State(None, name='suffix_end_%s' % hmm_name) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) last = len(delete_states) - 1 model.add_transition(model.start, unit_start, 1) model.add_transition(unit_end, model.end, 1) model.add_transition(unit_start, delete_states[0], 0.01) model.add_transition(unit_start, insert_states[0], 0.01) for i in range(len(pattern)): model.add_transition(unit_start, match_states[i], 0.98 / len(pattern)) model.add_transition(insert_states[0], insert_states[0], 0.01) model.add_transition(insert_states[0], delete_states[0], 0.01) model.add_transition(insert_states[0], match_states[0], 0.98) model.add_transition(delete_states[last], unit_end, 0.99) model.add_transition(delete_states[last], insert_states[last + 1], 0.01) model.add_transition(match_states[last], unit_end, 0.99) model.add_transition(match_states[last], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], unit_end, 0.99) for i in range(0, len(pattern)): model.add_transition(match_states[i], insert_states[i + 1], 0.01) model.add_transition(delete_states[i], insert_states[i + 1], 0.01) model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01) if i < len(pattern) - 1: model.add_transition(insert_states[i + 1], match_states[i + 1], 0.98) model.add_transition(insert_states[i + 1], delete_states[i + 1], 0.01) model.add_transition(match_states[i], match_states[i + 1], 0.98) model.add_transition(match_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], match_states[i + 1], 0.98) model.bake(merge=None) return model
infinite model, with no extra work! This change is passed on to all the algorithms. ''' from pomegranate import * from pomegranate import HiddenMarkovModel as Model import itertools as it import numpy as np # Define the states s1 = State(NormalDistribution(5, 2), name="S1") s2 = State(NormalDistribution(15, 2), name="S2") s3 = State(NormalDistribution(25, 2), name="S3 ") # Define the transitions model = Model("infinite") model.add_transition(model.start, s1, 0.7) model.add_transition(model.start, s2, 0.2) model.add_transition(model.start, s3, 0.1) model.add_transition(s1, s1, 0.6) model.add_transition(s1, s2, 0.1) model.add_transition(s1, s3, 0.3) model.add_transition(s2, s1, 0.4) model.add_transition(s2, s2, 0.4) model.add_transition(s2, s3, 0.2) model.add_transition(s3, s1, 0.05) model.add_transition(s3, s2, 0.15) model.add_transition(s3, s3, 0.8) model.bake() sequence = [4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1]
# Contact: Jacob Schreiber # [email protected] """ Example rainy-sunny HMM using yahmm. Example drawn from the wikipedia HMM article: http://en.wikipedia.org/wiki/Hidden_Markov_model describing what Bob likes to do on rainy or sunny days. """ from pomegranate import * from pomegranate import HiddenMarkovModel as Model import random import math random.seed(0) model = Model(name="Rainy-Sunny") # Emission probabilities rainy = State(DiscreteDistribution({ 'walk': 0.1, 'shop': 0.4, 'clean': 0.5 }), name='Rainy') sunny = State(DiscreteDistribution({ 'walk': 0.6, 'shop': 0.3, 'clean': 0.1 }), name='Sunny')
#!/usr/bin/env python2.7 # example.py: Yet Another Hidden Markov Model library # Contact: Jacob Schreiber ( [email protected] ) """ A simple example highlighting how to build a model using states, add transitions, and then run the algorithms, including showing how training on a sequence improves the probability of the sequence. """ import random from pomegranate import * from pomegranate import HiddenMarkovModel as Model random.seed(0) model = Model(name="ExampleModel") distribution = UniformDistribution(0.0, 1.0) state = State(distribution, name="uniform") state2 = State(NormalDistribution(0, 2), name="normal") silent = State(None, name="silent") model.add_state(state) model.add_state(state2) model.add_transition(state, state, 0.4) model.add_transition(state, state2, 0.4) model.add_transition(state2, state2, 0.4) model.add_transition(state2, state, 0.4) model.add_transition(model.start, state, 0.5) model.add_transition(model.start, state2, 0.5) model.add_transition(state, model.end, 0.2) model.add_transition(state2, model.end, 0.2)