def make_main(zone, name): emission = {} total = 0 for el in zone['column'].elements: if el != '-': if el not in emission: emission[el] = 2 total += 2 else: emission[el] += 1 total += 1 for key in emission: emission[key] = emission[key] / total # print('main', emission) return { 'type': 'main', 'emission': emission, 'zone': zone, 'main_state': State(DiscreteDistribution(emission), name='main ' + name), 'delete_state': State(None, name='none delete ' + name) if zone['delete'] else None }
def build_an_hmm_example(): # i think the characters in each DiscreteDistribution definition, means the emission matrix for each state # because it says the probability of seeing each character when the system is in that state d1 = DiscreteDistribution({'A': 0.35, 'C': 0.20, 'G': 0.05, 'T': 0.40}) d2 = DiscreteDistribution({'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25}) d3 = DiscreteDistribution({'A': 0.10, 'C': 0.40, 'G': 0.40, 'T': 0.10}) s1 = State(d1, name="s1") s2 = State(d2, name="s2") s3 = State(d3, name="s3") model = HiddenMarkovModel('example') model.add_states([s1, s2, s3]) model.add_transition(model.start, s1, 0.90) model.add_transition(model.start, s2, 0.10) model.add_transition(s1, s1, 0.80) model.add_transition(s1, s2, 0.20) model.add_transition(s2, s2, 0.90) model.add_transition(s2, s3, 0.10) model.add_transition(s3, s3, 0.70) model.add_transition(s3, model.end, 0.30) model.bake() for i in range(len(model.states)): print(model.states[i].name) model.plot() #print(model.log_probability(list('ACGACTATTCGAT'))) #print(", ".join(state.name for i, state in model.viterbi(list('ACGACTATTCGAT'))[1])) print("forward:", model.forward(list('ACG')))
def setup_titanic(): # Build a model of the titanic disaster global titanic_network, passenger, gender, tclass # Passengers on the Titanic either survive or perish passenger = DiscreteDistribution({'survive': 0.6, 'perish': 0.4}) # Gender, given survival data gender = ConditionalProbabilityTable( [['survive', 'male', 0.0], ['survive', 'female', 1.0], ['perish', 'male', 1.0], ['perish', 'female', 0.0]], [passenger]) # Class of travel, given survival data tclass = ConditionalProbabilityTable( [['survive', 'first', 0.0], ['survive', 'second', 1.0], ['survive', 'third', 0.0], ['perish', 'first', 1.0], ['perish', 'second', 0.0], ['perish', 'third', 0.0]], [passenger]) # State objects hold both the distribution, and a high level name. s1 = State(passenger, name="passenger") s2 = State(gender, name="gender") s3 = State(tclass, name="class") # Create the Bayesian network object with a useful name titanic_network = BayesianNetwork("Titanic Disaster") # Add the three nodes to the network titanic_network.add_nodes(s1, s2, s3) # Add transitions which represent conditional dependencies, where the # second node is conditionally dependent on the first node (Monty is # dependent on both guest and prize) titanic_network.add_edge(s1, s2) titanic_network.add_edge(s1, s3) titanic_network.bake()
def get_bayesnet(self): door_lock = DiscreteDistribution({'d1': 0.7, 'd2': 0.3}) clock_alarm = DiscreteDistribution( { 'a1' : 0.8, 'a2' : 0.2} ) light = ConditionalProbabilityTable( [[ 'd1', 'a1', 'l1', 0.96 ], ['d1', 'a1', 'l2', 0.04 ], [ 'd1', 'a2', 'l1', 0.89 ], [ 'd1', 'a2', 'l2', 0.11 ], [ 'd2', 'a1', 'l1', 0.96 ], [ 'd2', 'a1', 'l2', 0.04 ], [ 'd2', 'a2', 'l1', 0.89 ], [ 'd2', 'a2', 'l2', 0.11 ]], [door_lock, clock_alarm]) coffee_maker = ConditionalProbabilityTable( [[ 'a1', 'c1', 0.92 ], [ 'a1', 'c2', 0.08 ], [ 'a2', 'c1', 0.03 ], [ 'a2', 'c2', 0.97 ]], [clock_alarm] ) s_door_lock = State(door_lock, name="door_lock") s_clock_alarm = State(clock_alarm, name="clock_alarm") s_light = State(light, name="light") s_coffee_maker = State(coffee_maker, name="coffee_maker") network = BayesianNetwork("User_pref") network.add_nodes(s_door_lock, s_clock_alarm, s_light, s_coffee_maker) network.add_edge(s_door_lock,s_light) network.add_edge(s_clock_alarm,s_coffee_maker) network.add_edge(s_clock_alarm,s_light) network.bake() return network
def get_variable_number_of_repeats_matcher_hmm(patterns, copies=1, vpaths=None): model = get_constant_number_of_repeats_matcher_hmm(patterns, copies, vpaths) start_repeats_matches = State(None, name='start_repeating_pattern_match') end_repeats_matches = State(None, name='end_repeating_pattern_match') mat = model.dense_transition_matrix() states = model.states states.append(start_repeats_matches) states.append(end_repeats_matches) states_count = len(mat) start_repeats_ind = states_count end_repeats_ind = states_count + 1 mat = np.c_[mat, np.zeros(states_count), np.zeros(states_count)] mat = np.r_[mat, [np.zeros(states_count + 2)]] mat = np.r_[mat, [np.zeros(states_count + 2)]] unit_ends = [] for i, state in enumerate(model.states): if state.name.startswith('unit_end'): unit_ends.append(i) first_unit_start = None for i in range(len(mat[model.start_index])): if mat[model.start_index][i] != 0: first_unit_start = i mat[model.start_index][first_unit_start] = 0.0 mat[model.start_index][start_repeats_ind] = 1 mat[start_repeats_ind][first_unit_start] = 1 for unit_end in unit_ends: next_state = None for j in range(len(mat[unit_end])): if mat[unit_end][j] != 0: next_state = j mat[unit_end][next_state] = 0.5 mat[unit_end][end_repeats_ind] = 0.5 mat[end_repeats_ind][model.end_index] = 1 starts = np.zeros(states_count + 2) starts[model.start_index] = 1.0 ends = np.zeros(states_count + 2) ends[model.end_index] = 1.0 state_names = [state.name for state in states] distributions = [state.distribution for state in states] name = 'Repeat Matcher HMM Model' new_model = Model.from_matrix(mat, distributions, starts, ends, name=name, state_names=state_names, merge=None) new_model.bake(merge=None) return new_model
def setup_monty(): # Build a model of the Monty Hall Problem global monty_network, monty_index, prize_index, guest_index random.seed(0) # Friends emissions are completely random guest = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3}) # The actual prize is independent of the other distributions prize = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3}) # Monty is dependent on both the guest and the prize. monty = ConditionalProbabilityTable( [['A', 'A', 'A', 0.0], ['A', 'A', 'B', 0.5], ['A', 'A', 'C', 0.5], ['A', 'B', 'A', 0.0], ['A', 'B', 'B', 0.0], ['A', 'B', 'C', 1.0], ['A', 'C', 'A', 0.0], ['A', 'C', 'B', 1.0], ['A', 'C', 'C', 0.0], ['B', 'A', 'A', 0.0], ['B', 'A', 'B', 0.0], ['B', 'A', 'C', 1.0], ['B', 'B', 'A', 0.5], ['B', 'B', 'B', 0.0], ['B', 'B', 'C', 0.5], ['B', 'C', 'A', 1.0], ['B', 'C', 'B', 0.0], ['B', 'C', 'C', 0.0], ['C', 'A', 'A', 0.0], ['C', 'A', 'B', 1.0], ['C', 'A', 'C', 0.0], ['C', 'B', 'A', 1.0], ['C', 'B', 'B', 0.0], ['C', 'B', 'C', 0.0], ['C', 'C', 'A', 0.5], ['C', 'C', 'B', 0.5], ['C', 'C', 'C', 0.0]], [guest, prize]) # Make the states s1 = State(guest, name="guest") s2 = State(prize, name="prize") s3 = State(monty, name="monty") # Make the bayes net, add the states, and the conditional dependencies. monty_network = BayesianNetwork("test") monty_network.add_nodes(s1, s2, s3) monty_network.add_edge(s1, s3) monty_network.add_edge(s2, s3) monty_network.bake() monty_index = monty_network.states.index(s3) prize_index = monty_network.states.index(s2) guest_index = monty_network.states.index(s1)
def hmmer2pom(hmm): # set up environment from math import exp from pomegranate import DiscreteDistribution,HiddenMarkovModel,State tags = dict(); header = 0; alphabet = None; hmmlines = list() # parse HMMER file for line in hmm.splitlines(): l = line.strip() if len(l) == 0 or l[0] == '#': continue elif header == 0: if l.startswith('HMM') and l[3] != 'E': # beginning of actual HMM header = 1; alphabet = l.split()[1:] else: parts = l.strip().split() if parts[0] in tags: if not isinstance(tags[parts[0]], list): tags[parts[0]] = [tags[parts[0]]] tags[parts[0]].append(' '.join(parts[1:])) else: tags[parts[0]] = ' '.join(parts[1:]) elif header == 1: header = 2 else: if l.startswith('COMPO'): parts = l.strip().split(); tags[parts[0]] = ' '.join(parts[1:]) else: hmmlines.append(l) # create all states model = HiddenMarkovModel(tags['NAME']); tmpstates = list(); K = 0 i_emit = hmmlines[0].split(); tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(i_emit[i])) for i in range(len(alphabet))}), name="I0")) # insertion state for l in range(2,len(hmmlines),3): m_emit,i_emit,state_trans = [hmmlines[l+i].split() for i in range(0,3)]; K = int(m_emit[0]) tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(m_emit[i+1])) for i in range(len(alphabet))}), name="M%d" % K)) # match state tmpstates.append(State(DiscreteDistribution({alphabet[i] : exp(-1*float(i_emit[i])) for i in range(len(alphabet))}), name="I%d" % K)) # insertion state tmpstates.append(State(None, name="D%d" % K)) # deletion state assert K != 0, "No match states in profile HMM" model.add_states(tmpstates); name2state = {state.name:state for state in tmpstates}; name2state["M0"] = model.start; name2state["M%d"%(K+1)] = model.end # create all transitions for l in range(1,len(hmmlines),3): k = int(l/3); parts = hmmlines[l].split() model.add_transition(name2state["M%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[0]))) # 0: M_k -> M_k+1 model.add_transition(name2state["M%d"%k], name2state["I%d"%k], exp(-1*float(parts[1]))) # 1: M_k -> I_k if parts[2] != '*': # no D_k+1 in last row model.add_transition(name2state["M%d"%k], name2state["D%d"%(k+1)], exp(-1*float(parts[2]))) # 2: M_k -> D_k+1 model.add_transition(name2state["I%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[3]))) # 3: I_k -> M_k+1 model.add_transition(name2state["I%d"%k], name2state["I%d"%k], exp(-1*float(parts[4]))) # 4: I_k -> I_k if k != 0: # no D0 state model.add_transition(name2state["D%d"%k], name2state["M%d"%(k+1)], exp(-1*float(parts[5]))) # 5: D_k -> M_k+1 if parts[6] != '*': # no D0 state and no D_k+1 in last row model.add_transition(name2state["D%d"%k], name2state["D%d"%(k+1)], exp(-1*float(parts[6]))) # 6: D_k -> D_k+1 model.bake() return model.to_json()
def with_variations(dist, name): st = State(dist, name=name) sti = State(DiscreteDistribution({ 'a': 0.25, 'c': 0.25, 'g': 0.25, 't': 0.25 }), name='i_' + name) std = State(None, name='d_' + name) return st, sti, std
def update_hmm(self): num_states = self.num_states start_prob = self.start_prob num_emissions = self.num_emissions hmm = HiddenMarkovModel('hmm') dist = [ DiscreteDistribution( dict(zip(range(num_emissions), self.emissions[i]))) for i in range(num_states) ] states = [ State(dist[i], 's' + str(i).zfill(2)) for i in range(num_states) ] hmm.add_states(states) for i in range(num_states): s_i = states[i] hmm.add_transition(hmm.start, s_i, start_prob[i]) for j in range(num_states): s_j = states[j] p = self.transitions[i, j] hmm.add_transition(s_i, s_j, p) self.hmm = hmm self.hmm.bake()
def make_insert(zone, name): emission = {} total = 0 for column in zone['columns']: for el in column.elements: if el != '-': if el not in emission: emission[el] = 2 total += 2 else: emission[el] += 1 total += 1 for key in emission: emission[key] = emission[key] / total # print(emission) return { 'type': 'insert', 'emission': emission, 'zone': zone, 'insert_state': State(DiscreteDistribution(emission), name='insert ' + name) }
def insert_delete_main_hmm(data_matrix): v_columns = column_clasify(data_matrix) v_zones = create_zones(v_columns) v_grouped_states = group_states(v_zones, 'test') v_model = HiddenMarkovModel() v_first_state = State(None, name='ali_start') v_last_state = State(None, name='ali_end') v_model.add_state(v_first_state) v_model.add_transition(v_model.start, v_first_state, 1) v_model.add_state(v_last_state) add_states(v_model, v_grouped_states) v_trans = calculate_transitions(v_first_state, v_last_state, v_grouped_states) apply_transitions(v_model, v_trans) v_model.bake() return v_model
def oriHMMParams(self): """ Set initial parameters for the Hidden Markov Model (HMM). Attributes ---------- HMMParams : dict Has 3 keys: "A", state transition matrix, "B" (emission probabilities), specifying parameters (Means, Variances, Weights) of the mixture Gaussian distributions for each hidden state, and "pi", indicating the hidden state weights. This dict will be updated after learning procedure. """ hmm = HiddenMarkovModel() # GMM emissions # 5 Hidden States: # 0--start, 1--downstream, 2--no bias, 3--upstream, 4--end numdists = 3 # Three-distribution Gaussian Mixtures var = 7.5 / (numdists - 1) means = [[], [], [], [], []] for i in range(numdists): means[4].append(i * 7.5 / (numdists - 1) + 2.5) means[3].append(i * 7.5 / (numdists - 1)) means[2].append((i - (numdists - 1) / 2) * 7.5 / (numdists - 1)) means[1].append(-i * 7.5 / (numdists - 1)) means[0].append(-i * 7.5 / (numdists - 1) - 2.5) states = [] for i, m in enumerate(means): tmp = [] for j in m: tmp.append(NormalDistribution(j, var)) mixture = GeneralMixtureModel(tmp) states.append(State(mixture, name=str(i))) hmm.add_states(*tuple(states)) # Transmission matrix #A = [[0., 1., 0., 0., 0.], # [0., 0.4, 0.3, 0.3, 0.], # [0.05, 0., 0.5, 0.45, 0.], # [0., 0., 0., 0.5, 0.5], # [0.99, 0., 0.01, 0., 0.]] hmm.add_transition(states[0], states[1], 1) hmm.add_transition(states[1], states[1], 0.4) hmm.add_transition(states[1], states[2], 0.3) hmm.add_transition(states[1], states[3], 0.3) hmm.add_transition(states[2], states[0], 0.05) hmm.add_transition(states[2], states[2], 0.5) hmm.add_transition(states[2], states[3], 0.45) hmm.add_transition(states[3], states[3], 0.5) hmm.add_transition(states[3], states[4], 0.5) hmm.add_transition(states[4], states[0], 0.99) hmm.add_transition(states[4], states[2], 0.01) pi = [0.05, 0.3, 0.3, 0.3, 0.05] for i in range(len(states)): hmm.add_transition(hmm.start, states[i], pi[i]) hmm.bake() return hmm
def sequence_state_factory(states_data, name): states = [] for index, data in enumerate(states_data): state = State(DiscreteDistribution(data.states_distribution), name=name + str(index)) states.append(state) return states
def load_segmentation_model(modeldata): model = HiddenMarkovModel('model') states = {} for s in modeldata: if len(s['emission']) == 1: emission = NormalDistribution(*s['emission'][0][:2]) else: weights = np.array([w for _, _, w in s['emission']]) dists = [NormalDistribution(mu, sigma) for mu, sigma, _ in s['emission']] emission = GeneralMixtureModel(dists, weights=weights) state = State(emission, name=s['name']) states[s['name']] = state model.add_state(state) if 'start_prob' in s: model.add_transition(model.start, state, s['start_prob']) for s in modeldata: current = states[s['name']] for nextstate, prob in s['transition']: model.add_transition(current, states[nextstate], prob) model.bake() return model
def bake_model(tags_sequence, words_sequence): """ 'tags' are the time-demand labels that generate the emitted demand level. Demand level are represented by 'words' """ # rdemand words = [x for x in chain(*words_sequence)] tag_unigrams = unigram_counts(words) tag_bigrams = bigram_counts(words) # Uniform distribution for starting and ending labels all_labels = list(set(words)) tag_starts = starting_counts(all_labels) tag_ends = ending_counts(all_labels) basic_model = HiddenMarkovModel(name="base-hmm-tagger") # Emission count label_train = tags_sequence rdemand_train = words_sequence emission_count = pair_counts(rdemand_train, label_train) # States with emission probability distributions P(word | tag) states = [] for rdemand, label_dict in emission_count.items(): dist_tag = DiscreteDistribution({ label: cn / tag_unigrams[rdemand] for label, cn in label_dict.items() }) states.append(State(dist_tag, name=rdemand)) basic_model.add_states(states) state_names = [s.name for s in states] state_index = {tag: num for num, tag in enumerate(state_names)} # Start transition total_start = sum(tag_starts.values()) for tag, cn in tag_starts.items(): # sname = state_index[tag] basic_model.add_transition(basic_model.start, states[state_index[tag]], cn / total_start) # End transition total_end = sum(tag_ends.values()) for tag, cn in tag_ends.items(): basic_model.add_transition(states[state_index[tag]], basic_model.end, cn / total_end) # Edges between states for the observed transition frequencies P(tag_i | tag_i-1) for key, value in tag_bigrams.items(): basic_model.add_transition(states[state_index[key[0]]], states[state_index[key[1]]], value / tag_unigrams[key[0]]) # Finalize the model basic_model.bake() return basic_model
def state_sequence_from(emissions, name): states = [] for index, emission in enumerate(emissions): distribution = DiscreteDistribution(emission) state_name = name + '_' + str(index) print('creado estado', state_name) state = State(distribution, name=state_name) states.append(state) return states, [1] * (len(states) - 1)
def init_model(start_dip, stay_state, mean_eu, sd_eu, mean_loh): ## define distributions d_eu = NormalDistribution(mean_eu, sd_eu) ## euploid enriched at 0 d_loh = NormalDistribution(mean_loh, sd_eu) ## loss of heterozygosity enriched at 1 d_aneu = NormalDistribution(mean_loh / 2.0, sd_eu * 1.4) ## aneuploid enriched at 1 ## define states s_eu = State(d_eu, name='EU') ## enriched at 0 s_loh = State(d_loh, name='LOH') ## enriched at 1 s_aneu = State(d_aneu, name='ANEU') ## enriched at 1 ## define model and pass in states model = HiddenMarkovModel() model.add_states(s_eu, s_loh, s_aneu) ## define transition matrix (state a, state b, probability) model.add_transition(model.start, s_eu, start_dip) model.add_transition(model.start, s_loh, 1.0 - start_dip - 0.1) model.add_transition(model.start, s_aneu, 0.1) model.add_transition(s_eu, s_eu, stay_state) model.add_transition(s_eu, s_loh, 1.0 - 4 * stay_state / 5 - 0.001) model.add_transition(s_eu, s_aneu, 1.0 - stay_state / 5 - 0.001) model.add_transition(s_eu, model.end, 0.002) model.add_transition(s_loh, s_loh, stay_state) model.add_transition(s_loh, s_eu, 1.0 - 4 * stay_state / 5 - 0.001) model.add_transition(s_loh, s_aneu, 1.0 - stay_state / 5 - 0.001) model.add_transition(s_loh, model.end, 0.002) model.add_transition(s_aneu, s_aneu, stay_state) model.add_transition(s_aneu, s_eu, 1.0 - stay_state / 2 - 0.001) model.add_transition(s_aneu, s_loh, 1.0 - stay_state / 2 - 0.001) model.add_transition(s_aneu, model.end, 0.002) ## finalize internal structure model.bake() ## only train transitions, not emissions model.freeze_distributions() return model
def build_the_same_model_in_test_sample_from_site_line_by_line(): # State olds emission distribution, but not #transition distribution, because that's stored in the graph edges. s1 = State(NormalDistribution(5, 1)) s2 = State(NormalDistribution(1, 7)) s3 = State(NormalDistribution(8, 2)) model = HiddenMarkovModel() model.add_states(s1, s2, s3) model.add_transition(model.start, s1, 1.0) model.add_transition(s1, s1, 0.7) model.add_transition(s1, s2, 0.3) model.add_transition(s2, s2, 0.8) model.add_transition(s2, s3, 0.2) model.add_transition(s3, s3, 0.9) model.add_transition(s3, model.end, 0.1) model.bake() model.plot()
def train_hmm_tagger(data): # HMM # Use the tag unigrams and bigrams calculated above to construct a hidden Markov tagger. # # - Add one state per tag # - The emission distribution at each state should be estimated with the formula: $P(w|t) = \frac{C(t, w)}{C(t)}$ # - Add an edge from the starting state `basic_model.start` to each tag # - The transition probability should be estimated with the formula: $P(t|start) = \frac{C(start, t)}{C(start)}$ # - Add an edge from each tag to the end state `basic_model.end` # - The transition probability should be estimated with the formula: $P(end|t) = \frac{C(t, end)}{C(t)}$ # - Add an edge between _every_ pair of tags # - The transition probability should be estimated with the formula: $P(t_2|t_1) = \frac{C(t_1, t_2)}{C(t_1)}$ basic_model = HiddenMarkovModel(name="base-hmm-tagger") state_dict = {} states = [] emission_counts = pair_counts(*list(zip( *data.training_set.stream()))[::-1]) for tag in emission_counts.keys(): tag_count = tag_unigrams[tag] probs = {} for w in emission_counts[tag]: probs[w] = emission_counts[tag][w] / tag_count emission_p = DiscreteDistribution(probs) state = State(emission_p, name="" + tag) basic_model.add_state(state) state_dict[tag] = state for tag in tag_starts: basic_model.add_transition(basic_model.start, state_dict[tag], tag_starts[tag] / len(data.training_set.Y)) basic_model.add_transition(state_dict[tag], basic_model.end, tag_ends[tag] / tag_unigrams[tag]) for (tag1, tag2) in tag_bigrams: basic_model.add_transition( state_dict[tag1], state_dict[tag2], tag_bigrams[(tag1, tag2)] / tag_unigrams[tag1]) # finalize the model basic_model.bake() assert all( tag in set(s.name for s in basic_model.states) for tag in data.training_set.tagset ), "Every state in your network should use the name of the associated tag, which must be one of the training set tags." assert basic_model.edge_count() == 168, ( "Your network should have an edge from the start node to each state, one edge between every " + "pair of tags (states), and an edge from each state to the end node.") HTML( '<div class="alert alert-block alert-success">Your HMM network topology looks good!</div>' ) return basic_model
def buildHmm(minAmpliconLength, maxGap, windowSize): b_bkgd_1 = 0.1 a_interstate = b_bkgd_1**(2 * minAmpliconLength / windowSize) b_amp_0 = (a_interstate)**(0.5 * windowSize / maxGap) b_amp_1 = 1 - b_amp_0 b_bkgd_0 = 1 - b_bkgd_1 bkgdDist = DiscreteDistribution({0: b_bkgd_0, 1: b_bkgd_1}) ampDist = DiscreteDistribution({0: b_amp_0, 1: b_amp_1}) s_bkgd = State(bkgdDist, name='background') s_amp = State(ampDist, name='amplicon') hmm = HiddenMarkovModel() hmm.add_states(s_bkgd, s_amp) hmm.add_transition(hmm.start, s_bkgd, 1 - a_interstate) hmm.add_transition(hmm.start, s_amp, a_interstate) hmm.add_transition(s_bkgd, s_bkgd, 1 - a_interstate) hmm.add_transition(s_bkgd, s_amp, a_interstate) hmm.add_transition(s_amp, s_bkgd, a_interstate) hmm.add_transition(s_amp, s_amp, 1 - a_interstate) hmm.bake() return hmm
def build_net(cpts): states = dict() for name, cpt in cpts.items(): states[name] = State(cpt, name=name) model = BayesianNetwork('Poker Game') model.add_states(*list(states.values())) for name, parents, _ in sheets: for parent in parents: print(states[parent]) model.add_transition(states[parent], states[name]) model.bake() return model
def create_hidden_MarkovModel(e_df, q_df, start_p_dict): """ Creates a Hidden Markov Model based on DataFrame @args: - e_df (pd.Dataframe): contains the emission probabilites - q_df (pd.Dataframe): contains the emission probabilites """ model = HiddenMarkovModel(name="Example Model") '#1: Create a dict for each key in trans. df' model_dict = {} for key in q_df.keys().values: model_dict[key] = {} '#2: Create the states' for key in model_dict: '#2.1.Step Add teh emission prob. to each state, , P(observation | state)' emission_p = DiscreteDistribution(e_df[key].to_dict()) sunny_state = State(emission_p, name=key) model_dict[key] = State(emission_p, name=key) model.add_state(model_dict[key]) '#2.2.Step: Add the start probability for each state' model.add_transition(model.start, model_dict[key], start_p_dict[key]) '#3.Step: Add the transition probability to each state' for key, item in q_df.to_dict("index").items(): for item_name, value in item.items(): print(key, " , ", item_name, ": ", value) tmp_origin = model_dict[key] tmp_destination = model_dict[item_name] model.add_transition(tmp_origin, tmp_destination, q_df.loc[key, item_name]) # finally, call the .bake() method to finalize the model model.bake() return model
def ghmm_model(states_labels: tuple, transitions: tuple, init_prob: tuple, end_prob: tuple, means: list, vars: list) -> HiddenMarkovModel: """ :param states_labels: :param transitions: :param init_prob: :param end_prob: :param means: :param vars: :return: """ hmm_model = HiddenMarkovModel() mix_num = len(vars[0]) states = [] for state_i, state in enumerate(states_labels): mixture = [] for mix_i in range(mix_num): init_mean = means[state_i][mix_i] init_var = vars[state_i][mix_i] mixture.append(NormalDistribution(init_mean, init_var)) states.append(State(GeneralMixtureModel(mixture), name=str(state_i))) hmm_model.add_states(*tuple(states)) for row in range(len(states_labels)): for col in range(len(states_labels)): prob = transitions[row][col] if prob != 0.: hmm_model.add_transition(states[row], states[col], prob) for state_i, prob in enumerate(init_prob): if prob != 0.: hmm_model.add_transition(hmm_model.start, states[state_i], prob) for state_i, prob in enumerate(end_prob): if prob != 0.: hmm_model.add_transition(states[state_i], hmm_model.end, prob) hmm_model.bake() return hmm_model
def emission_state_list(training_word_prob_path, training_all_word_path): if not os.path.exists(training_word_prob_path): word_by_tag(training_all_word_path, training_word_prob_path) df = pd.read_csv(training_word_prob_path) df.drop(columns=['Unnamed: 0', 'SUM'], inplace=True) distinct_types = [col for col in df.columns if col not in ['Word', 'SUM']] unigram_word_hash = {} """ Split df based on distinct tags """ for d in distinct_types: sample_df = df[df[d] > 0] unigram_word_hash[d] = sample_df[['Word', d]] """ P W|T V word in tag """ for k in unigram_word_hash: subset = unigram_word_hash[k] subset[k] = subset[k] / subset[k].sum() unigram_word_hash[k] = subset """ Assert probability distribution == 1 """ for k in unigram_word_hash: subset = unigram_word_hash[k] print(f'{k} distribution sum => {subset[k].sum()}') """ Create discrete distribution objects """ for k in unigram_word_hash: dist = unigram_word_hash[k] print(k) dist_dict = dist.set_index("Word").T.to_dict("Records")[0] if k == 'NOUN': print(dist_dict['time']) discrete_dist = DiscreteDistribution(dist_dict) unigram_word_hash[k] = State(discrete_dist, name=k) return unigram_word_hash
# Calculate the count of each tag starting a sequence tag_starts = starting_counts(data.training_set.Y) # Calculate the count of each tag ending a sequence tag_ends = ending_counts(data.training_set.Y) basic_model = HiddenMarkovModel(name="base-hmm-tagger") # Create states with emission probability distributions P(word | tag) and add to the model tag_states = {} for tag in data.training_set.tagset: tag_emissions = DiscreteDistribution({ word: emission_counts[tag][word] / tag_unigrams[tag] for word in emission_counts[tag] }) tag_states[tag] = State(tag_emissions, name=tag) basic_model.add_state(tag_states[tag]) # Add edges between states for the observed transition frequencies P(tag_i | tag_i-1) for tag in data.training_set.tagset: basic_model.add_transition(basic_model.start, tag_states[tag], tag_starts[tag] / tag_unigrams[tag]) for tag1 in data.training_set.tagset: basic_model.add_transition( tag_states[tag], tag_states[tag1], tag_bigrams[(tag, tag1)] / tag_unigrams[tag]) basic_model.add_transition(tag_states[tag], basic_model.end, tag_ends[tag] / tag_unigrams[tag]) # finalize the model basic_model.bake()
def build_reference_repeat_finder_hmm(patterns, copies=1): pattern = patterns[0] model = Model(name="HMM Model") insert_distribution = DiscreteDistribution({ 'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25 }) last_end = None start_random_matches = State(insert_distribution, name='start_random_matches') end_random_matches = State(insert_distribution, name='end_random_matches') model.add_states([start_random_matches, end_random_matches]) for repeat in range(copies): insert_states = [] match_states = [] delete_states = [] for i in range(len(pattern) + 1): insert_states.append( State(insert_distribution, name='I%s_%s' % (i, repeat))) for i in range(len(pattern)): distribution_map = dict({ 'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01 }) distribution_map[pattern[i]] = 0.97 match_states.append( State(DiscreteDistribution(distribution_map), name='M%s_%s' % (str(i + 1), repeat))) for i in range(len(pattern)): delete_states.append( State(None, name='D%s_%s' % (str(i + 1), repeat))) unit_start = State(None, name='unit_start_%s' % repeat) unit_end = State(None, name='unit_end_%s' % repeat) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) last = len(delete_states) - 1 if repeat > 0: model.add_transition(last_end, unit_start, 0.5) else: model.add_transition(model.start, unit_start, 0.5) model.add_transition(model.start, start_random_matches, 0.5) model.add_transition(start_random_matches, unit_start, 0.5) model.add_transition(start_random_matches, start_random_matches, 0.5) model.add_transition(unit_end, end_random_matches, 0.5) if repeat == copies - 1: model.add_transition(unit_end, model.end, 0.5) model.add_transition(end_random_matches, end_random_matches, 0.5) model.add_transition(end_random_matches, model.end, 0.5) model.add_transition(unit_start, match_states[0], 0.98) model.add_transition(unit_start, delete_states[0], 0.01) model.add_transition(unit_start, insert_states[0], 0.01) model.add_transition(insert_states[0], insert_states[0], 0.01) model.add_transition(insert_states[0], delete_states[0], 0.01) model.add_transition(insert_states[0], match_states[0], 0.98) model.add_transition(delete_states[last], unit_end, 0.99) model.add_transition(delete_states[last], insert_states[last + 1], 0.01) model.add_transition(match_states[last], unit_end, 0.99) model.add_transition(match_states[last], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], unit_end, 0.99) for i in range(0, len(pattern)): model.add_transition(match_states[i], insert_states[i + 1], 0.01) model.add_transition(delete_states[i], insert_states[i + 1], 0.01) model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01) if i < len(pattern) - 1: model.add_transition(insert_states[i + 1], match_states[i + 1], 0.98) model.add_transition(insert_states[i + 1], delete_states[i + 1], 0.01) model.add_transition(match_states[i], match_states[i + 1], 0.98) model.add_transition(match_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], match_states[i + 1], 0.98) last_end = unit_end model.bake() if len(patterns) > 1: # model.fit(patterns, algorithm='baum-welch', transition_pseudocount=1, use_pseudocount=True) fit_patterns = [pattern * copies for pattern in patterns] model.fit(fit_patterns, algorithm='viterbi', transition_pseudocount=1, use_pseudocount=True) return model
def get_constant_number_of_repeats_matcher_hmm(patterns, copies): model = Model(name="Repeating Pattern Matcher HMM Model") transitions, emissions = build_profile_hmm_for_repeats( patterns, settings.MAX_ERROR_RATE) matches = [m for m in emissions.keys() if m.startswith('M')] last_end = None for repeat in range(copies): insert_states = [] match_states = [] delete_states = [] for i in range(len(matches) + 1): insert_distribution = DiscreteDistribution(emissions['I%s' % i]) insert_states.append( State(insert_distribution, name='I%s_%s' % (i, repeat))) for i in range(1, len(matches) + 1): match_distribution = DiscreteDistribution(emissions['M%s' % i]) match_states.append( State(match_distribution, name='M%s_%s' % (str(i), repeat))) for i in range(1, len(matches) + 1): delete_states.append(State(None, name='D%s_%s' % (str(i), repeat))) unit_start = State(None, name='unit_start_%s' % repeat) unit_end = State(None, name='unit_end_%s' % repeat) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) n = len(delete_states) - 1 if repeat > 0: model.add_transition(last_end, unit_start, 1) else: model.add_transition(model.start, unit_start, 1) if repeat == copies - 1: model.add_transition(unit_end, model.end, 1) model.add_transition(unit_start, match_states[0], transitions['unit_start']['M1']) model.add_transition(unit_start, delete_states[0], transitions['unit_start']['D1']) model.add_transition(unit_start, insert_states[0], transitions['unit_start']['I0']) model.add_transition(insert_states[0], insert_states[0], transitions['I0']['I0']) model.add_transition(insert_states[0], delete_states[0], transitions['I0']['D1']) model.add_transition(insert_states[0], match_states[0], transitions['I0']['M1']) model.add_transition(delete_states[n], unit_end, transitions['D%s' % (n + 1)]['unit_end']) model.add_transition(delete_states[n], insert_states[n + 1], transitions['D%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(match_states[n], unit_end, transitions['M%s' % (n + 1)]['unit_end']) model.add_transition(match_states[n], insert_states[n + 1], transitions['M%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(insert_states[n + 1], insert_states[n + 1], transitions['I%s' % (n + 1)]['I%s' % (n + 1)]) model.add_transition(insert_states[n + 1], unit_end, transitions['I%s' % (n + 1)]['unit_end']) for i in range(1, len(matches) + 1): model.add_transition(match_states[i - 1], insert_states[i], transitions['M%s' % i]['I%s' % i]) model.add_transition(delete_states[i - 1], insert_states[i], transitions['D%s' % i]['I%s' % i]) model.add_transition(insert_states[i], insert_states[i], transitions['I%s' % i]['I%s' % i]) if i < len(matches): model.add_transition(insert_states[i], match_states[i], transitions['I%s' % i]['M%s' % (i + 1)]) model.add_transition(insert_states[i], delete_states[i], transitions['I%s' % i]['D%s' % (i + 1)]) model.add_transition(match_states[i - 1], match_states[i], transitions['M%s' % i]['M%s' % (i + 1)]) model.add_transition(match_states[i - 1], delete_states[i], transitions['M%s' % i]['D%s' % (i + 1)]) model.add_transition(delete_states[i - 1], match_states[i], transitions['D%s' % i]['M%s' % (i + 1)]) model.add_transition(delete_states[i - 1], delete_states[i], transitions['D%s' % i]['D%s' % (i + 1)]) last_end = unit_end model.bake(merge=None) return model
def get_suffix_matcher_hmm(pattern): model = Model(name="Suffix Matcher HMM Model") insert_distribution = DiscreteDistribution({ 'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25 }) insert_states = [] match_states = [] delete_states = [] hmm_name = 'suffix' for i in range(len(pattern) + 1): insert_states.append( State(insert_distribution, name='I%s_%s' % (i, hmm_name))) for i in range(len(pattern)): distribution_map = dict({'A': 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.01}) distribution_map[pattern[i]] = 0.97 match_states.append( State(DiscreteDistribution(distribution_map), name='M%s_%s' % (str(i + 1), hmm_name))) for i in range(len(pattern)): delete_states.append( State(None, name='D%s_%s' % (str(i + 1), hmm_name))) unit_start = State(None, name='suffix_start_%s' % hmm_name) unit_end = State(None, name='suffix_end_%s' % hmm_name) model.add_states(insert_states + match_states + delete_states + [unit_start, unit_end]) last = len(delete_states) - 1 model.add_transition(model.start, unit_start, 1) model.add_transition(unit_end, model.end, 1) model.add_transition(unit_start, delete_states[0], 0.01) model.add_transition(unit_start, insert_states[0], 0.01) for i in range(len(pattern)): model.add_transition(unit_start, match_states[i], 0.98 / len(pattern)) model.add_transition(insert_states[0], insert_states[0], 0.01) model.add_transition(insert_states[0], delete_states[0], 0.01) model.add_transition(insert_states[0], match_states[0], 0.98) model.add_transition(delete_states[last], unit_end, 0.99) model.add_transition(delete_states[last], insert_states[last + 1], 0.01) model.add_transition(match_states[last], unit_end, 0.99) model.add_transition(match_states[last], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], insert_states[last + 1], 0.01) model.add_transition(insert_states[last + 1], unit_end, 0.99) for i in range(0, len(pattern)): model.add_transition(match_states[i], insert_states[i + 1], 0.01) model.add_transition(delete_states[i], insert_states[i + 1], 0.01) model.add_transition(insert_states[i + 1], insert_states[i + 1], 0.01) if i < len(pattern) - 1: model.add_transition(insert_states[i + 1], match_states[i + 1], 0.98) model.add_transition(insert_states[i + 1], delete_states[i + 1], 0.01) model.add_transition(match_states[i], match_states[i + 1], 0.98) model.add_transition(match_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], delete_states[i + 1], 0.01) model.add_transition(delete_states[i], match_states[i + 1], 0.98) model.bake(merge=None) return model
def setup_huge_monty(): # Build the huge monty hall huge_monty_network. This is an example I made # up with which may not exactly flow logically, but tests a varied type of # tables ensures heterogeneous types of data work together. global huge_monty_network, huge_monty_friend, huge_monty_guest, huge_monty global huge_monty_remaining, huge_monty_randomize, huge_monty_prize # Huge_Monty_Friend huge_monty_friend = DiscreteDistribution({True: 0.5, False: 0.5}) # Huge_Monty_Guest emisisons are completely random huge_monty_guest = ConditionalProbabilityTable( [[True, 'A', 0.50], [True, 'B', 0.25], [True, 'C', 0.25], [False, 'A', 0.0], [False, 'B', 0.7], [False, 'C', 0.3]], [huge_monty_friend]) # Number of huge_monty_remaining cars huge_monty_remaining = DiscreteDistribution({0: 0.1, 1: 0.7, 2: 0.2, }) # Whether they huge_monty_randomize is dependent on the numnber of # huge_monty_remaining cars huge_monty_randomize = ConditionalProbabilityTable( [[0, True, 0.05], [0, False, 0.95], [1, True, 0.8], [1, False, 0.2], [2, True, 0.5], [2, False, 0.5]], [huge_monty_remaining]) # Where the huge_monty_prize is depends on if they huge_monty_randomize or # not and also the huge_monty_guests huge_monty_friend huge_monty_prize = ConditionalProbabilityTable( [[True, True, 'A', 0.3], [True, True, 'B', 0.4], [True, True, 'C', 0.3], [True, False, 'A', 0.2], [True, False, 'B', 0.4], [True, False, 'C', 0.4], [False, True, 'A', 0.1], [False, True, 'B', 0.9], [False, True, 'C', 0.0], [False, False, 'A', 0.0], [False, False, 'B', 0.4], [False, False, 'C', 0.6]], [huge_monty_randomize, huge_monty_friend]) # Monty is dependent on both the huge_monty_guest and the huge_monty_prize. huge_monty = ConditionalProbabilityTable( [['A', 'A', 'A', 0.0], ['A', 'A', 'B', 0.5], ['A', 'A', 'C', 0.5], ['A', 'B', 'A', 0.0], ['A', 'B', 'B', 0.0], ['A', 'B', 'C', 1.0], ['A', 'C', 'A', 0.0], ['A', 'C', 'B', 1.0], ['A', 'C', 'C', 0.0], ['B', 'A', 'A', 0.0], ['B', 'A', 'B', 0.0], ['B', 'A', 'C', 1.0], ['B', 'B', 'A', 0.5], ['B', 'B', 'B', 0.0], ['B', 'B', 'C', 0.5], ['B', 'C', 'A', 1.0], ['B', 'C', 'B', 0.0], ['B', 'C', 'C', 0.0], ['C', 'A', 'A', 0.0], ['C', 'A', 'B', 1.0], ['C', 'A', 'C', 0.0], ['C', 'B', 'A', 1.0], ['C', 'B', 'B', 0.0], ['C', 'B', 'C', 0.0], ['C', 'C', 'A', 0.5], ['C', 'C', 'B', 0.5], ['C', 'C', 'C', 0.0]], [huge_monty_guest, huge_monty_prize]) # Make the states s0 = State(huge_monty_friend, name="huge_monty_friend") s1 = State(huge_monty_guest, name="huge_monty_guest") s2 = State(huge_monty_prize, name="huge_monty_prize") s3 = State(huge_monty, name="huge_monty") s4 = State(huge_monty_remaining, name="huge_monty_remaining") s5 = State(huge_monty_randomize, name="huge_monty_randomize") # Make the bayes net, add the states, and the conditional dependencies. huge_monty_network = BayesianNetwork("test") huge_monty_network.add_nodes(s0, s1, s2, s3, s4, s5) huge_monty_network.add_transition(s0, s1) huge_monty_network.add_transition(s1, s3) huge_monty_network.add_transition(s2, s3) huge_monty_network.add_transition(s4, s5) huge_monty_network.add_transition(s5, s2) huge_monty_network.add_transition(s0, s2) huge_monty_network.bake()
def spacer_states_maker(quantity, distribution, name): states = [] for i in range(0, quantity): state = State(DiscreteDistribution(distribution), name=name + str(i)) states.append(state) return states