def add_new_state_node(self, t): if len(self.variables["states"]) > t: # the state already exists in the graph. return else: if t == 0: P = self.get_prior_factor() self.factors["transition"].append(pm.DiscreteDistribution(P)) # add prior factor self.variables["states"].append(pm.State( self.factors["transition"][t], name="Damage {}".format(t))) self.add_node(self.variables["states"][t]) else: T = self.get_transition_factor() # check if we have a controlA if len(self.variables["controlAs"]) > t-1: self.factors["transition"].append(pm.ConditionalProbabilityTable(T, [self.factors["transition"][t-1],self.factors["controlA"][t-1]])) else: self.factors["transition"].append(pm.ConditionalProbabilityTable(T, [self.factors["transition"][t-1],self.factors["controlP"][t-1]])) # add RV as a node in the graph self.variables["states"].append(pm.State( self.factors["transition"][t], name="Damage {}".format(t))) self.add_node(self.variables["states"][t]) # connect node via transition edge self.add_edge(self.variables["states"][t-1], self.variables["states"][t] ) # connect node via control edge if len(self.variables["controlAs"]) > t-1: self.add_edge(self.variables["controlAs"][t-1], self.variables["states"][t] ) else: self.add_edge(self.variables["controlPs"][t-1], self.variables["states"][t] )
def get_states(self, data_dict): # todo check """ Return dicts of pomgranate states with initialized normal multivariate distributions """ left_buffer = self.buffer // 2 if not any(self.data.manual_table.is_labeled): # Estimate emission distributions (same as pomegranate does usually) data_vec = np.concatenate([dat.loc[:, self.feature_list].to_numpy() for dat in data_dict.values()], 0) if data_vec.shape[0] > 1000: # avoid endless waiting for k-means guess in large dataset km_idx = np.random.choice(data_vec.shape[0], 1000, replace=False) else: km_idx = np.arange(data_vec.shape[0]) km = Kmeans(k=self.nb_states, n_init=1).fit(X=data_vec[km_idx, :]) y = km.predict(data_vec) def distfun(s1, s2): return self.get_dist(data_vec[np.logical_or(y == s1, y == s2), :].T) else: # Estimate emission distributions from given class labels labeled_indices = self.data.manual_table.query('is_labeled and not is_junk').index data_vec = np.concatenate([data_dict[idx].loc[:, self.feature_list].to_numpy() for idx in data_dict if idx in labeled_indices], 0) y = np.concatenate([self.data.label_dict[idx] for idx in data_dict if idx in labeled_indices], 0) y_edge = np.concatenate([get_edge_labels(self.data.label_dict[idx].astype(int), self.buffer) for idx in data_dict if idx in labeled_indices], 0) def distfun(s1, s2): return self.get_dist(data_vec[y_edge == f'e{s1}_{s2}', :].T) # Create states pg_gui_state_dict = dict() states = dict() for i in range(self.nb_states): sn = f's{i}' X = data_vec[y == i, :].T.copy() if X.size == 0 and self.trained is not None: states[sn] = self.trained.states[self.str2num_state_dict[sn]] else: states[sn] = pg.State(self.get_main_dist(data_vec[y == i, :].T.copy()), name=f's{i}') pg_gui_state_dict[sn] = i present_states = list(states) # Create edge states edges = list(permutations(range(self.nb_states), 2)) # edges = list(permutations(np.unique(y.astype(int)), 2)) edge_states = dict() for edge in edges: # if not (f's{edge[0]}' in present_states and f's{edge[0]}' in present_states): continue sn = f'e{edge[0]}_{edge[1]}' estates_list = list() for i in range(self.buffer): estates_list.append(pg.State(distfun(*edge), name=f'e{edge[0]}_{edge[1]}_{i}')) pg_gui_state_dict[f'{sn}_{i}'] = int(edge[0]) if i < left_buffer else int(edge[1]) edge_states[sn] = [estates_list, (f's{edge[0]}', f's{edge[1]}')] return states, edge_states, pg_gui_state_dict
def hmm(nstates=2, bias=0.1): def make_bias(i, s): if i == 0: return [bias, 1 - bias][s] else: return [1 - bias, bias][s] states = [ pmg.State(pmg.DiscreteDistribution({ 0: make_bias(i, 0), 1: make_bias(i, 1) }), name='S%d' % i) for i in range(nstates) ] #trans = np.ones((nstates, nstates)) / nstates; trans = np.random.rand(nstates, nstates) for i in range(nstates): trans[i] = trans[i] / trans[i].sum() model = pmg.HiddenMarkovModel() model.add_states(states) for i in range(nstates): for j in range(nstates): model.add_transition(states[i], states[j], trans[i, j]) model.add_transition(model.start, states[i], 1.0 / nstates) model.bake() return model
def _fit_word_model(X, nstates, **kwargs): wmodel = pomegranate.HiddenMarkovModel(None) wmodel.start.name = str(-1) wmodel.end.name = str(nstates) states = [ pomegranate.State(PrecomputedDistribution(s, nstates), name=str(s)) for s in range(nstates) ] for s in range(nstates): wmodel.add_state(states[s]) wmodel.add_transition(states[s], states[s], 0.8) wmodel.add_transition(wmodel.start, states[0], 1) for s in range(1, nstates): wmodel.add_transition(states[s - 1], states[s], 0.15) wmodel.add_transition(states[-1], wmodel.end, 0.15) wmodel.add_transition(states[-2], states[1], 0.05) for s in range(2, nstates - 1): wmodel.add_transition(states[s - 2], states[s], 0.05) wmodel.bake() improvement = wmodel.fit(X, **kwargs) if np.isnan(improvement): raise ValueError print("HMM improvement: {:2.4f}".format(improvement)) return [(int(e[0].name), int(e[1].name), np.exp(e[2]['probability'])) for e in wmodel.graph.edges(data=True)]
def __set_target_probability(self): for i in range(action.Action.num_actions): if i != action.Action.ST: prob_value = 0.5 if self.__target_vec != None: direction_vec = action.VECTOR[i] direction_vec.normalize() self.__target_vec.normalize() dot = self.__target_vec.dot_product(direction_vec) self.__target_dots[i] = dot prob_value = self.__target_threshold + ( 1.0 - self.__target_threshold) * max(0, dot) self.__d_target[i] = pm.ConditionalProbabilityTable( [['T', 'T', prob_value], ['T', 'F', 1.0 - prob_value], ['F', 'T', 0.5], ['F', 'F', 0.5]], [self.__d_direction[i]]) else: self.__target_dots[i] = 0 self.__d_target[i] = pm.ConditionalProbabilityTable( [['T', 'T', 0.5], ['T', 'F', 0.5], ['F', 'T', 0.5], ['F', 'F', 0.5]], [self.__d_direction[i]]) self.__s_target = [ pm.State(self.__d_target[i], name='target_' + str(i)) for i in range(action.Action.num_actions) ]
def _add_network_states(self, conditional_probability_tables, bayesian_network): states = {} for key, cpt in conditional_probability_tables.iteritems(): state = pg.State(cpt, name=key) bayesian_network.add_state(state) states[key] = state return states
def probDiagnose(percList): """ Funzione che riceve in input una lista con tre tuple contenenti ognuna una categoria e la loro percentuale e, attraverso l'uso di una rete bayesiana ed il calcolo delle probabilità condizionate, restituisce una lista con le probabilità delle categorie delle diagnosi condizionate dalle categorie dei sintomi Parameters ---------- percList: list Lista contenente tre tuple: ogni tupla contiene una categoria e la rispettiva percentuale(per i sintomi) Returns ------- condProbList: list Lista contenente tre tuple: ogni tupla contiene una categoria e la rispettiva probabilità(per le diagnosi) """ import pomegranate as pg sym = pg.DiscreteDistribution({ 'gen': 192. / 389, 'sup': 125. / 389, 'inf': 72. / 389 }) diagn = pg.ConditionalProbabilityTable( [['gen', 'gen', 0.5], ['gen', 'sup', 0.25], ['gen', 'inf', 0.25], ['sup', 'gen', 0.20], ['sup', 'sup', 0.75], ['sup', 'inf', 0.05], ['inf', 'gen', 0.2], ['inf', 'sup', 0.05], ['inf', 'inf', 0.75]], [sym]) s1 = pg.State(sym, name="sym") s2 = pg.State(diagn, name="diagn") model = pg.BayesianNetwork("Diagnose finder") model.add_states(s1, s2) model.add_edge(s1, s2) model.bake() condProbList = [] for i in percList: beliefs1 = model.predict_proba({'sym': i[1]}) condProbList.append(beliefs1[1].parameters[0]) return condProbList
def add_new_obs_node(self, t, sensor_measurement): O = self.get_observation_factor(sensor_measurement) self.factors["observation"].append(pm.ConditionalProbabilityTable(O, [self.factors["transition"][t]])) self.variables["observations"].append(pm.State( self.factors["observation"][t], name="Observation {}".format(t))) self.evidence["Observation {}".format(t)] = json.dumps(sensor_measurement) # add RV as a node in the graph self.add_node(self.variables["observations"][t]) # connect node via observation edge self.add_edge(self.variables["states"][t], self.variables["observations"][t] )
def get_states(self, data_dict): # todo check """ Return dicts of pomgranate states with initialized normal multivariate distributions. In supervised mode, do not return a state if no examples are available! """ left_buffer = self.buffer // 2 if not any(self.data.manual_table.is_labeled): # Estimate emission distributions (same as pomegranate does usually) data_vec = np.concatenate([dat.loc[:, self.feature_list].to_numpy() for dat in data_dict.values()], 0) if data_vec.shape[0] > 1000: # avoid endless waiting for k-means guess in large dataset km_idx = np.random.choice(data_vec.shape[0], 1000, replace=False) else: km_idx = np.arange(data_vec.shape[0]) km = Kmeans(k=self.nb_states, n_init=1).fit(X=data_vec[km_idx, :]) y = km.predict(data_vec) # if 'E_FRET' in self.feature_list: # order found clusters on E_FRET value if possible # efret_centroids = km.centroids[:, [True if feat == 'E_FRET' else False for feat in self.feature_list]].squeeze() # kml_dict = {ol: nl for ol, nl in zip(np.arange(self.nb_states), np.argsort(efret_centroids))} # y = np.vectorize(kml_dict.__getitem__)(y) def distfun(s1, s2): return self.get_dist(data_vec[np.logical_or(y == s1, y == s2), :].T) else: # Estimate emission distributions from given class labels labeled_indices = self.data.manual_table.query('is_labeled').index data_vec = np.concatenate([data_dict[idx].loc[:, self.feature_list].to_numpy() for idx in data_dict if idx in labeled_indices], 0) y = np.concatenate([self.data.label_dict[idx] for idx in data_dict if idx in labeled_indices], 0) y_edge = np.concatenate([get_edge_labels(self.data.label_dict[idx].astype(int), self.buffer) for idx in data_dict if idx in labeled_indices], 0) def distfun(s1, s2): return self.get_dist(data_vec[y_edge == f'e{s1}{s2}', :].T) # Create states pg_gui_state_dict = dict() states = dict() gm_dict = dict() for i in range(self.nb_states): sn = f's{i}' if np.sum(y == i) < 2: continue states[sn], added_state_names, gm = self.get_substates(data_vec[y == i, :].T, state_name=sn) gm_dict[sn] = gm for asn in added_state_names: pg_gui_state_dict[asn] = i present_states = list(states) # Create edge states edges = list(permutations(np.unique(y.astype(int)), 2)) edge_states = dict() for edge in edges: if not (f's{edge[0]}' in present_states and f's{edge[0]}' in present_states): continue sn = f'e{edge[0]}{edge[1]}' estates_list = list() for i in range(self.buffer): estates_list.append(pg.State(distfun(*edge), name=f'e{edge[0]}{edge[1]}_{i}')) pg_gui_state_dict[f'{sn}_{i}'] = int(edge[0]) if i < left_buffer else int(edge[1]) edge_states[sn] = [estates_list, (f's{edge[0]}', f's{edge[1]}')] return states, edge_states, pg_gui_state_dict, gm_dict
def _solve_bayes_network(cpts, conditionals=None): print(f'cpts: {cpts}') print(f'conditionals: {cpts}') model = pmg.BayesianNetwork("User Produced Model") states = [] distributions = [] cond = [] _cond_stage = [] def _translator(string): if string == 0 or string == '0': return 'True' elif string == 1 or string == '1': return 'False' else: return None counter = 0 for i, name in enumerate(cpts.keys()): temp_dict = cpts[name].to_dict() if name not in conditionals: for k in temp_dict.keys(): distributions.append(pmg.DiscreteDistribution(temp_dict[k])) states.append(pmg.State(distributions[counter], name=name)) counter += 1 else: _cond_stage.append(i) for col in temp_dict.keys(): for val in temp_dict[col].keys(): arr = [_translator(col), val, temp_dict[col][val]] cond.append(arr) print(f'cond: {cond}') states.append( pmg.State(pmg.ConditionalProbabilityTable(cond, distributions), name=name)) for i, s in enumerate(states): print(f'i: {i}') print(f's: {s}') model.add_states(s) if i not in _cond_stage and _cond_stage: model.add_edge(s, states[_cond_stage[0]]) model.bake() return model
def generate_model(state, transition): # Setup hmm model = pomegranate.HiddenMarkovModel() A = pomegranate.State(pomegranate.DiscreteDistribution({'A': state, 'B': 1-state}), name='A') B = pomegranate.State(pomegranate.DiscreteDistribution({'A': 1-state, 'B': state}), name='B') model.add_transition(model.start, A, 0.5) model.add_transition(model.start, B, 0.5) model.add_transition(A, A, 1-transition) model.add_transition(A, B, transition) model.add_transition(B, A, transition) model.add_transition(B, B, 1-transition) model.add_transition(A, model.end, 0.5) model.add_transition(B, model.end, 0.5) model.bake(verbose=False) return model
def add_new_ref_obs_node(self, t): if len(self.variables["ref_observations"]) > t: #node is already in the graph return else: self.factors["ref_observation"].append(pm.ConditionalProbabilityTable(self.Q_factor, [self.factors["transition"][t]])) self.variables["ref_observations"].append(pm.State(self.factors["ref_observation"][t], name="Ref. Observation {}".format(t))) # add RV as a node in the graph self.add_node(self.variables["ref_observations"][t]) # connect node via ref observation edge self.add_edge(self.variables["states"][t], self.variables["ref_observations"][t] )
def run(): # Load dataset path = 'datasets/' with open(path + datasetload, 'rb') as f: a = pickle.load(f) X = a[0] X = X.astype(int) # Create HMM D = bond_dimension N = X.shape[1] d = np.max(X + 1) list_of_states = [] for i in xrange(N): list_of_states.append([]) for u in xrange(bond_dimension): dictionnary = dict() for l in xrange(d): dictionnary[str(l)] = np.random.rand() list_of_states[i].append( pomegranate.State( pomegranate.DiscreteDistribution(dictionnary))) model = pomegranate.HiddenMarkovModel() for i in xrange(N - 1): for d in xrange(D): for d2 in xrange(D): model.add_transition(list_of_states[i][d], list_of_states[i + 1][d2], np.random.rand()) for d in xrange(D): model.add_transition(model.start, list_of_states[0][d], np.random.rand()) for d in xrange(D): model.add_transition(list_of_states[N - 1][d], model.end, np.random.rand()) model.bake() # Train HMM begin = time.time() sequencetrain = [[str(i) for i in v] for v in X] np.random.seed() model.fit(sequencetrain,algorithm='baum-welch',stop_threshold=1e-50,min_iterations=1000,\ max_iterations=n_iter) u = 0 for i in sequencetrain: u += model.log_probability(i) accuracy = -u / len(sequencetrain) time_elapsed = time.time() - begin print("Negative log likelihood = %.3f" % (accuracy)) print("Time elapsed = %.2fs" % (time_elapsed))
def fit_hmm(self): print('Fitting Model') s0 = pg.State(pg.MultivariateGaussianDistribution( np.array([1, 1, 1]), .1 * np.eye(3)), name='0') s1 = pg.State(pg.MultivariateGaussianDistribution( np.array([1, 1, 1]), 3 * np.eye(3)), name='1') s2 = pg.State(pg.MultivariateGaussianDistribution( np.array([.5, .5, .5]), .1 * np.eye(3) + .1 * np.ones([3, 3])), name='2') s3 = pg.State(pg.MultivariateGaussianDistribution( np.array([1.5, 1.5, 1.5]), .1 * np.eye(3) + .1 * np.ones([3, 3])), name='3') model = pg.HiddenMarkovModel() model.add_states([s0, s1, s2, s3]) model.add_transition(model.start, s0, .85) model.add_transition(model.start, s1, .05) model.add_transition(model.start, s2, .05) model.add_transition(model.start, s3, .05) model.add_transition(s0, s0, .85) model.add_transition(s0, s1, .05) model.add_transition(s0, s2, .05) model.add_transition(s0, s3, .05) model.add_transition(s1, s0, .1) model.add_transition(s1, s1, .7) model.add_transition(s1, s2, .1) model.add_transition(s1, s3, .1) model.add_transition(s2, s0, .1) model.add_transition(s2, s1, .1) model.add_transition(s2, s2, .7) model.add_transition(s2, s3, .1) model.add_transition(s3, s0, .1) model.add_transition(s3, s1, .1) model.add_transition(s3, s2, .1) model.add_transition(s3, s3, .7) model.bake() model.fit(self.accels_filt) self.model = model
def get_substates(self, vec, state_name): vec_clean = vec[:, np.invert(np.any(np.isnan(vec), axis=0))] nb_clust = min(2, vec_clean.shape[1]) gm = GaussianMixture(n_components=nb_clust).fit(vec_clean.T) gm.covariances_ += np.eye(gm.covariances_.shape[1]) * 1E-9 added_state_names = [] st_list = [] for n in range(nb_clust): sn = f'{state_name}_{str(n)}' added_state_names.append(sn) st = pg.State(pg.MultivariateGaussianDistribution(gm.means_[n,:], gm.covariances_[n, :, :]), name=sn) st_list.append(st) return st_list, added_state_names, gm
def make_hmm_model(emission_mat, transition_probs): model = pomegranate.HiddenMarkovModel('ndf') ictal_emissions = {i:emission_mat[1,i] for i in range(emission_mat.shape[1])} baseline_emissions = {i:emission_mat[0,i] for i in range(emission_mat.shape[1])} ictal = pomegranate.State(pomegranate.DiscreteDistribution(ictal_emissions ), name = '1') baseline = pomegranate.State(pomegranate.DiscreteDistribution(baseline_emissions), name = '0') model.add_state(ictal) model.add_state(baseline) model.add_transition( model.start, ictal, 0.05 ) model.add_transition( model.start, baseline, 99.95) model.add_transition( baseline, baseline, transition_probs[0,0] ) model.add_transition( baseline, ictal, transition_probs[0,1] ) model.add_transition( ictal, ictal , transition_probs[1,1] ) model.add_transition( ictal, baseline, transition_probs[1,0] ) model.bake(verbose=False ) return model
def add_new_controlP_node(self,t): # 'Predicted' or estimated control node if len(self.variables["controlPs"]) > t: #node is already in the graph return else: C = self.get_controlP_factor() self.factors["controlP"].append(pm.ConditionalProbabilityTable(C, [self.factors["transition"][t]])) self.variables["controlPs"].append(pm.State(self.factors["controlP"][t], name="ControlP {}".format(t))) # add RV as a node in the graph self.add_node(self.variables["controlPs"][t]) # connect node via observation edge self.add_edge(self.variables["states"][t], self.variables["controlPs"][t] )
def add_new_controlA_node(self, t, control): # 'Actual' or enacted control node C = self.get_controlA_factor(control) self.factors["controlA"].append(pm.DiscreteDistribution(C)) self.variables["controlAs"].append(pm.State(self.factors["controlA"][t], name="ControlA {}".format(t))) # add RV as a node in the graph self.add_node(self.variables["controlAs"][t]) if len(self.variables["states"]) > t+1: T = self.get_transition_factor() self.factors["transition"][t+1].__init__ (T, [self.factors["transition"][t],self.factors["controlA"][t]]) self.variables["states"][t+1].__init__(self.factors["transition"][t+1], name="Damage {}".format(t+1)) for idx, (a,b) in enumerate(self.edges): if a.name == "ControlP {}".format(t) and b.name == "Damage {}".format(t+1): self.edges[idx] = (self.variables["controlAs"][t], self.variables["states"][t+1])
def create_states(model, hidden_sequence, state_names): chain_model = pg.MarkovChain.from_samples([hidden_sequence]) states = {} # type: Dict[str, pg.State] for name in state_names: states[name] = pg.State(dist[state_names.index(name)], name=name) model.add_states(list(states.values())) # sets the starting probability for state 'Wake' to 1.0 try: model.add_transition(model.start, states['Wake'], 1.0) poznamka.append("") except KeyError: print("nezacina wake") poznamka.append('nezacina wake') pass # insert the emission probabilities, that we computed in summary for prob in chain_model.distributions[1].parameters[0]: state1 = states[prob[0]] state2 = states[prob[1]] probability = prob[2] model.add_transition(state1, state2, probability)
def create_state(data_init, n_mix, dim_feature, name_phn, name_state, covar_type='full'): """full covar GMM""" if covar_type == 'full': mixtures = create_mixture_full_covar(n_mix=n_mix, dim_feature=dim_feature) elif covar_type == 'diag': mixtures = create_mixture_diag_covar(data_init=data_init, n_mix=n_mix, dim_feature=dim_feature) else: raise ValueError("{} is not a valid covar type.".format(covar_type)) state = pomegranate.State(pomegranate.GeneralMixtureModel(mixtures), name=name_phn + name_state) return state
def get_substate_object(self, vec, state_name): vec_clean = vec[:, np.invert(np.any(np.isnan(vec), axis=0))] nb_clust = min(10, vec_clean.shape[1]) # labels = GaussianMixture(n_components=nb_clust).fit_predict(vec_clean.T) gm = GaussianMixture(n_components=nb_clust).fit(vec_clean.T) gm.covariances_ += np.eye(gm.covariances_.shape[1]) * 1E-9 hmm_out = pg.HiddenMarkovModel() hmm_out.name = state_name hmm_out.start.name = f'{state_name}_start' hmm_out.end.name = f'{state_name}_end' added_state_names = [] for n in range(nb_clust): sn = f'{state_name}_{str(n)}' added_state_names.append(sn) st = pg.State(pg.MultivariateGaussianDistribution( gm.means_[n, :], gm.covariances_[n, :, :]), name=sn) hmm_out.add_state(st) hmm_out.add_transition(hmm_out.start, st, gm.weights_[n], pseudocount=9999999) hmm_out.add_transition(st, hmm_out.end, 1.0, pseudocount=9999999) return hmm_out, added_state_names
def __init__(self, map_manager, agent_type, sampling=False): self.__map_manager = map_manager self.__position = None self.__percept = None self.__action_map = None self.__target_threshold = 0.3 self.__max_prob_dir = None # target dot products self.__target_dots = [None] * action.Action.num_actions # Random variables marked as true will be considered in the bayesian net self._considered = { 'target': True, 'danger': True, 'obstruction': True, 'visibility': True, 'hider': True, 'seeker': True, 'blockage': True } if agent_type == agent.AgentType.Seeker: self._considered['danger'] = False self.__sampling = sampling # Probability distributions self.__d_direction = [ pm.DiscreteDistribution({ 'T': 0.5, 'F': 0.5 }) for i in range(action.Action.num_actions) ] self.__s_direction = [ pm.State(self.__d_direction[i], name='direction_' + str(i)) for i in range(action.Action.num_actions) ] # Random vars, probability distributions and state vars of considered variables # in the bayesian net if self._considered['target']: self.__r_target = [None] * action.Action.num_actions self.__d_target = [None] * action.Action.num_actions self.__s_target = None if self._considered['danger']: self.__r_danger = [None] * action.Action.num_actions self.__d_danger = [ pm.ConditionalProbabilityTable( [['T', '0', 0.99], ['T', '1', 0.01], ['F', '0', 0.5], ['F', '1', 0.5]], [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_danger = [ pm.State(self.__d_danger[i], name='danger_' + str(i)) for i in range(action.Action.num_actions) ] if self._considered['obstruction']: self.__r_obstruction = [None] * action.Action.num_actions self.__d_obstruction = [ pm.ConditionalProbabilityTable( [['T', '0', 0.001], ['T', '1', 0.003], ['T', '2', 0.006], ['T', '3', 0.99], ['F', '0', 1. / 4], ['F', '1', 1. / 4], ['F', '2', 1. / 4], ['F', '3', 1. / 4]], [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_obstruction = [ pm.State(self.__d_obstruction[i], name='obstruction_' + str(i)) for i in range(action.Action.num_actions) ] if self._considered['visibility']: self.__r_visibility = [None] * action.Action.num_actions self.__d_visibility = [ pm.ConditionalProbabilityTable( [['T', '0', 0.001], ['T', '1', 0.003], ['T', '2', 0.006], ['T', '3', 0.99], ['F', '0', 1. / 4], ['F', '1', 1. / 4], ['F', '2', 1. / 4], ['F', '3', 1. / 4]], [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_visibility = [ pm.State(self.__d_visibility[i], name='visibility_' + str(i)) for i in range(action.Action.num_actions) ] cpt_a = [['T', '0', 0.9], ['T', '1', 0.066], ['T', '2', 0.033], ['F', '0', 1. / 3], ['F', '1', 1. / 3], ['F', '2', 1. / 3]] cpt_b = [['T', '0', 0.9], ['T', '1', 0.077], ['T', '2', 0.022], ['F', '0', 1. / 3], ['F', '1', 1. / 3], ['F', '2', 1. / 3]] target_cpt = None if self._considered['hider']: if agent_type == agent.AgentType.Hider: target_cpt = cpt_a elif agent_type == agent.AgentType.Seeker: target_cpt = cpt_b self.__r_hider = [None] * action.Action.num_actions self.__d_hider = [ pm.ConditionalProbabilityTable(target_cpt, [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_hider = [ pm.State(self.__d_hider[i], name='hider_' + str(i)) for i in range(action.Action.num_actions) ] if self._considered['seeker']: if agent_type == agent.AgentType.Hider: target_cpt = cpt_b elif agent_type == agent.AgentType.Seeker: target_cpt = cpt_a self.__r_seeker = [None] * action.Action.num_actions self.__d_seeker = [ pm.ConditionalProbabilityTable(target_cpt, [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_seeker = [ pm.State(self.__d_seeker[i], name='seeker_' + str(i)) for i in range(action.Action.num_actions) ] if self._considered['blockage']: self.__r_blockage = [None] * action.Action.num_actions self.__d_blockage = [ pm.ConditionalProbabilityTable( [['T', '0', 0.999999], ['T', '1', 0.000001], ['F', '0', 0.5], ['F', '1', 0.5]], [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_blockage = [ pm.State(self.__d_blockage[i], name='blockage_' + str(i)) for i in range(action.Action.num_actions) ] # State objects(for pomegranate) library which hold both the distribution as well as name self.__model = None self.__inferred_results = None self.__direction_probs = [None] * action.Action.num_actions self.__direction_dist = None
start_probability = np.array([0.5, 0.5]) T = np.array([[0.6,0.4],[0.3,0.7]]) # Matrice de transition temporaire B = np.array([[0.1,0.4,0.5],[0.6,0.3,0.1]]) # matrice d'émission temporaire dicoObs={'fine': 0 ,'moyenne':1, 'epaisse':2} # pour transformer les chaines en entier (0,1 et 2) dicoState={'cold':0 ,'hot':1} ## Creation de la chaine de Markov model = pg.HiddenMarkovModel( name="partie 3" ) # Creation instance # Matrice d'emission # Creation etat beau temps et prob emission cold = pg.State( pg.DiscreteDistribution({ 'fine': B[0,0],'moyenne': B[0,1],'epaisse':B[0,2]}), name='cold' ) # Creation etat beau temps et prob emission hot = pg.State( pg.DiscreteDistribution({ 'fine': B[1,0],'moyenne': B[1,1],'epaisse':B[1,2]}), name='hot' ) # Matrice de transition model.add_transitions(model.start,[cold,hot],[0.5, 0.5]) # Probs initiales model.add_transitions(cold, [cold,hot],[T[0,0],T[0,1]]) # transitions depuis sunny model.add_transitions(hot, [cold,hot],[T[1,0],T[1,1]]) # transition depuis rainy model.add_transition( model.start, cold, start_probability[0] ) model.add_transition( model.start, hot, start_probability[1] )
B = np.array([[1, 0], [0.8, 0.2], [0, 1]]) # matrice d'émission temporaire dicoObs = { 'sun': 0, 'rain': 1 } # pour transformer les chaines en entier (0,1 et 2) dicoState = {'c.sky': 0, 'cloudy': 1, 'v.cloudy': 2} ## Creation de la chaine de Markov model = pg.HiddenMarkovModel(name="partie 2") # Creation instance # Matrice d'emission # Creation etat beau temps et prob emission sunny = pg.State(pg.DiscreteDistribution({ 'sun': B[0, 0], 'rain': B[0, 1] }), name='c.sky') # Creation etat beau temps et prob emission cloudy = pg.State(pg.DiscreteDistribution({ 'sun': B[1, 0], 'rain': B[1, 1] }), name='cloudy') # Creation etat beau temps et prob emission v_cloudy = pg.State(pg.DiscreteDistribution({ 'sun': B[2, 0], 'rain': B[2, 1] }),
for _, row in data.iterrows(): val = row['eeg'] # type: int state = row['doctor'] # type: str summary[state][val] += 1 for key in summary.keys(): total = sum(summary[key].values(), 0.0) summary[key] = {k: v / total for k, v in summary[key].items()} states = {} # type: Dict[str, pg.State] for name in state_names: dist = pg.DiscreteDistribution(summary[name]) states[name] = pg.State(dist, name=name) #counts transition probabilites chain_model = pg.MarkovChain.from_samples([state_sequence]) #creates empty hidden markov model with name 'prediction' model = pg.HiddenMarkovModel('prediction') #adds the states to the model model.add_states(list(states.values())) #sets the starting probability for state 'Wake' to 1.0 model.add_transition(model.start, states['Wake'], 1.0) #insert the emission probabilities, that we computed in summary for prob in chain_model.distributions[1].parameters[0]: state1 = states[prob[0]] state2 = states[prob[1]]
alpha]]) # Matrice de transition temporaire B = np.array([[1, 0], [0, 1]]) # matrice d'émission temporaire dicoObs = { 'sun': 0, 'rain': 1 } # pour transformer les chaines en entier (0 et 1) dicoState = {'sunny': 0, 'rainy': 1} ## Creation de la chaine de Markov model = pg.HiddenMarkovModel(name="partie 1") # Creation instance # Matrice d'emission sunny = pg.State(pg.DiscreteDistribution({ 'sun': B[0, 0], 'rain': B[0, 1] }), name='sunny') # Creation etat beau temps et prob emission rainy = pg.State(pg.DiscreteDistribution({ 'sun': B[1, 0], 'rain': B[1, 1], }), name='rainy') # Création de l'état pluie et prob emission # Matrice de transition model.add_transitions(model.start, [sunny, rainy], [gamma, 1 - gamma]) # Probs initiales model.add_transitions(sunny, [sunny, rainy], [T[0, 0], T[0, 1]]) # transitions depuis sunny model.add_transitions(rainy, [sunny, rainy], [T[1, 0], T[1, 1]]) # transition depuis rainy
def fit_transitions(self, X, gloss_seqs, **hmm_fit_args): # Train individual word models params = [] for i in range(len(self.labels)): # Range of state indexes for this label axes = sum(self.chains_lengths[:i]), sum(self.chains_lengths[:i + 1]) # Compute posteriors for the states of this label subsgments = [(seq, start, stop) for seq, gloss_seq in enumerate(gloss_seqs) for l, start, stop in gloss_seq if l == self.labels[i]] Xw = [[Xm[seq][start:stop] for seq, start, stop in subsgments] for Xm in X] Xw = [ self.posterior.predict_logproba(*x)[:, axes[0]:axes[1]] for x in zip(*Xw) ] Xw = [x - self.p_s[None, axes[0]:axes[1]] for x in Xw] # Xw = [x - logsumexp(x, axis=1, keepdims=True) for x in Xw] # pseudo log-likelihoods params.append( self._fit_word_model(Xw, self.chains_lengths[i], **hmm_fit_args)) # Create complete model print("loading trained parameters into the model") self.hmm = pomegranate.HiddenMarkovModel(None) states = [] for i in range(self.nstates): s = pomegranate.State(PrecomputedDistribution(i, self.nstates), name=str(i)) states.append(s) self.hmm.add_state(s) self.hmm.start.name = str(-1) self.hmm.end.name = str(self.nstates) self.hmm.add_transition(self.hmm.start, states[-1], 1) self.hmm.add_transition(states[-1], states[-1], self.p_idle2idle) for i in range(self.nlabels): state_offset = np.sum(self.chains_lengths[:i]) l = self.chains_lengths[i] for s1, s2, p in params[i]: # Adjust indexes and parameters to integrate within full model s2 = -1 if s2 == l else s2 + state_offset if s1 == -1: p = self.p_idle2gesture else: s1 += state_offset self.hmm.add_transition(states[s1], states[s2], p) self.hmm.bake() # Build mapping between internal indexes and ours self.state2idx = np.array([ int(s.name) for s in self.hmm.states if s.name not in {"-1", str(self.nstates)} ], dtype=np.int32) idx2labels = np.concatenate([ np.full((self.chains_lengths[i], ), self.labels[i]) for i in range(self.nlabels) ] + [np.array([0.0])]).astype(np.int32) self.state2label = np.array([ idx2labels[int(s.name)] for s in self.hmm.states if int(s.name) not in {-1, self.nstates} ])
def get_model(r, params, window_size, num_skipped, seq_len, p, \ g, resample_prob, x_chr=False, haploid=False, debug=False, h_t=1, skip_score=float("-Inf")): """ Builds the hidden Markov model for a given chromosome or scaffold, using the Pomegranate module. Arguments: r -- (float) the per site, per generation recombination probability params -- a dict where keys are names of states (AA, AB, and BB) and values are dicts where values are mu and sd, which are floats representing means and standard deviations of emission probability distributions window_size -- (int) the window size for this run, in bp num_skipped -- (int) the number of windows that were skipped due to not passing criteria seq_len -- (int) the number of windows in the current chromosome/scaffold p -- (float) the percent ancestry the admixed population derives from ancestral population A (estimated beforehand) g -- (int) the number of generations since admixture (estimated beforehand) resample_prob -- (float) probability of resampling the same ancestral recombination event twice in an individual after the set number of generations since admixture (referred to as z in the paper) x_chr -- (boolean) does this chromosome/scaffold belong to a hemizygous sex chromosome? haploid -- (boolean) is this individual haploid along this chromosome/scaffold? debug -- (boolean) should debugging messages be printed to the screen? h_t -- (float) if the user has specified that expected reduction in heterozygosity given the number of generations since admixture should be incorporated into the model, this is the expected fraction of the initial heterozygosity that remains after g generations. skip_score -- (float) the number emitted by adlibs_score when "skipped" windows are encountered Returns: a Pomegranate HMM object for the current chromosome/scaffold """ global prob_lim model = pomegranate.HiddenMarkovModel(name='ancestry') # Compute probabilities of transitioning to a skip state or the end. Cap these # both at the specified probability limit. skip_prob = num_skipped / seq_len if skip_prob > prob_lim: skip_prob = prob_lim state_end = 1 / seq_len if state_end > prob_lim: state_end = prob_lim if x_chr: r *= (2 / 3) # Determine probabilities of transitions if haploid: # Should 2 be 1.5? I don't think so -- we already multiplied r by (2/3) # so that's in here already. aa_bb = g * r * (1 - p) bb_aa = g * r * p # Eliminate the heterozygous state. aa_ab = 0 ab_aa = 0 bb_ab = 0 ab_bb = 0 else: probs = get_trans_probs(r, g, p, resample_prob) aa_ab = probs['aa_ab'] ab_aa = probs['ab_aa'] aa_bb = probs['aa_bb'] bb_ab = probs['bb_ab'] ab_bb = probs['ab_bb'] bb_aa = probs['bb_aa'] aa_ab *= window_size ab_aa *= window_size aa_bb *= window_size bb_ab *= window_size ab_bb *= window_size bb_aa *= window_size aa_aa = 1 - (aa_ab + aa_bb + state_end + skip_prob) ab_ab = 1 - (ab_aa + ab_bb + state_end + skip_prob) bb_bb = 1 - (bb_aa + bb_ab + state_end + skip_prob) # Account for reduction in heterozygosity due to genetic drift if haploid: pass #aa_aa += (aa_bb - aa_bb*h_t) #aa_bb *= h_t #bb_bb += (bb_aa - bb_aa*h_t) #bb_aa *= h_t else: aa_aa += (aa_aa / (aa_aa + aa_bb)) * (aa_ab - aa_ab * h_t) aa_bb += (aa_bb / (aa_aa + aa_bb)) * (aa_ab - aa_ab * h_t) bb_aa += (bb_aa / (bb_aa + bb_bb)) * (bb_ab - bb_ab * h_t) bb_bb += (bb_bb / (bb_aa + bb_bb)) * (bb_ab - bb_ab * h_t) aa_ab *= h_t bb_ab *= h_t ab_aa += (ab_aa / (ab_aa + ab_bb)) * (ab_ab - ab_ab * h_t) ab_bb += (ab_bb / (ab_aa + ab_bb)) * (ab_ab - ab_ab * h_t) ab_ab *= h_t if debug: print("# AA -> AA {}".format(aa_aa), file=sys.stderr) print("# AA -> AB {}".format(aa_ab), file=sys.stderr) print("# AA -> BB {}".format(aa_bb), file=sys.stderr) print("# AB -> AA {}".format(ab_aa), file=sys.stderr) print("# AB -> AB {}".format(ab_ab), file=sys.stderr) print("# AB -> BB {}".format(ab_bb), file=sys.stderr) print("# BB -> AA {}".format(bb_aa), file=sys.stderr) print("# BB -> AB {}".format(bb_ab), file=sys.stderr) print("# BB -> BB {}".format(bb_bb), file=sys.stderr) print("# SKIP {}".format(skip_prob), file=sys.stderr) aaDist = pomegranate.NormalDistribution(params['AA']['mu'], params['AA']['sd']) abDist = pomegranate.NormalDistribution(params['AB']['mu'], params['AB']['sd']) bbDist = pomegranate.NormalDistribution(params['BB']['mu'], params['BB']['sd']) aaState = pomegranate.State(aaDist, name="AA") abState = pomegranate.State(abDist, name="AB") bbState = pomegranate.State(bbDist, name="BB") model.add_state(aaState) if not haploid: model.add_state(abState) model.add_state(bbState) #### ADD skip states skip_dist = pomegranate.UniformDistribution(skip_score - 0.01, skip_score) aa_skip_state = pomegranate.State(skip_dist, name="skip-AA") ab_skip_state = pomegranate.State(skip_dist, name="skip-AB") bb_skip_state = pomegranate.State(skip_dist, name="skip-BB") model.add_state(aa_skip_state) if not haploid: model.add_state(ab_skip_state) model.add_state(bb_skip_state) if haploid: model.add_transition(model.start, aaState, p * (1 - skip_prob)) model.add_transition(model.start, aa_skip_state, p * skip_prob) model.add_transition(model.start, bbState, (1 - p) * (1 - skip_prob)) model.add_transition(model.start, bb_skip_state, (1 - p) * skip_prob) else: model.add_transition(model.start, aaState, p**2 * (1 - skip_prob)) model.add_transition(model.start, aa_skip_state, p**2 * skip_prob) model.add_transition(model.start, abState, 2 * p * (1 - p) * (1 - skip_prob)) model.add_transition(model.start, ab_skip_state, 2 * p * (1 - p) * skip_prob) model.add_transition(model.start, bbState, (1 - p)**2 * (1 - skip_prob)) model.add_transition(model.start, bb_skip_state, (1 - p)**2 * skip_prob) model.add_transition(aaState, model.end, 1 / seq_len) if not haploid: model.add_transition(abState, model.end, 1 / seq_len) model.add_transition(bbState, model.end, 1 / seq_len) model.add_transition(aaState, bbState, aa_bb) model.add_transition(aaState, aaState, aa_aa) model.add_transition(bbState, aaState, bb_aa) model.add_transition(bbState, bbState, bb_bb) if not haploid: model.add_transition(aaState, abState, aa_ab) model.add_transition(abState, aaState, ab_aa) model.add_transition(abState, bbState, ab_bb) model.add_transition(abState, abState, ab_ab) model.add_transition(bbState, abState, bb_ab) ### Add skip state transitions model.add_transition(aaState, aa_skip_state, skip_prob) if not haploid: model.add_transition(abState, ab_skip_state, skip_prob) model.add_transition(bbState, bb_skip_state, skip_prob) model.add_transition(aa_skip_state, aa_skip_state, skip_prob) if not haploid: model.add_transition(ab_skip_state, ab_skip_state, skip_prob) model.add_transition(bb_skip_state, bb_skip_state, skip_prob) model.add_transition(aa_skip_state, bbState, aa_bb) model.add_transition(bb_skip_state, aaState, bb_aa) if not haploid: model.add_transition(aa_skip_state, abState, aa_ab) model.add_transition(ab_skip_state, aaState, ab_aa) model.add_transition(ab_skip_state, bbState, ab_bb) model.add_transition(bb_skip_state, abState, bb_ab) model.add_transition(aa_skip_state, model.end, 1 / seq_len) if not haploid: model.add_transition(ab_skip_state, model.end, 1 / seq_len) model.add_transition(bb_skip_state, model.end, 1 / seq_len) model.add_transition(aa_skip_state, aaState, 1 - skip_prob - aa_ab - aa_bb - 1 / seq_len) if not haploid: model.add_transition(ab_skip_state, abState, 1 - skip_prob - ab_aa - ab_bb - 1 / seq_len) model.add_transition(bb_skip_state, bbState, 1 - skip_prob - bb_aa - bb_ab - 1 / seq_len) ### model.bake() return model
start_probability = np.array([1, 0, 0]) T = np.array([[0.5 , 0.4 , 0.1],[0.3 , 0.4 , 0.3 ],[0.1 , 0.2 , 0.7 ]]) # Matrice de transition temporaire B = np.array([[0.5 , 0.5],[0.25,0.75], [0.75, 0.25]]) # matrice d'émission temporaire dicoObs={'pile': 0 ,'face':1} # pour transformer les chaines en entier (0,1 et 2) dicoState={'P1':0 ,'P2':1, 'P3':2} ## Creation de la chaine de Markov model = pg.HiddenMarkovModel( name="partie 5" ) # Creation instance # Matrice d'emission # Creation etat beau temps et prob emission p1 = pg.State( pg.DiscreteDistribution({ 'pile': B[0,0],'face': B[0,1]}), name='P1' ) p2 = pg.State( pg.DiscreteDistribution({ 'pile': B[1,0],'face': B[1,1]}), name='P2' ) p3 = pg.State( pg.DiscreteDistribution({ 'pile': B[2,0],'face': B[2,1]}), name='P3') # Matrice de transition model.add_transitions(model.start,[p1,p2,p3],[1, 0, 0]) # Probs initiales model.add_transitions(p1, [p1,p2,p3],[T[0,0],T[0,1],T[0,2]]) # transitions depuis sunny model.add_transitions(p2, [p1,p2,p3],[T[1,0],T[1,1],T[1,2]]) # transition depuis rainy model.add_transitions(p3, [p1,p2,p3],[T[2,0],T[2,1],T[2,2]]) # transition depuis rainy
def decode_sequence(probs=None, algorithm='threshold', params=dict(n=5, t=.8), verbose=True): ''' Once a model outputs probabilities for some sequence of data, that data shall be passed to this method. This method will use various ways to decode an underlying sequence in order to determine where the *actual* canned laughter was. possible algorithms to decode sequence: - 'neural' surround-n-gram neural network: this method will use a pretrained Keras model to label some sample i using the multiclass probabilities of all of the samples numbered [i-n, i-n+1, ... i, i+1, ..., i+n], i.e., n before and n afterwards. - 'hmm' HMM: this method will use a hidden Markov model with underlying states that are the same as surface states (the two state spaces for hidden and observed are equivalent). uses Viterbi to decode the underlying state sequence. requires a params to be passed as dict(c=DiscreteDistribution) where c is a class (label) and DiscreteDistribution is an instance of emission probabilities created using `pomegranate`, for each such class c (0, 1, 2, ...) - 'threshold' window and threshold method: this is simple heuristic-based method that will observe windows of length n, and if the average probability of any single class is at least t, it will assign that same class to all of the samples in that window. imagine a threshold of 0.9, then it is intuitively likely if few of the samples are labeled with some other class, they may have been accidentally so-labeled. - 'modethreshold' like 'threshold' but instead of considering avg probability, it considers what percentage of labels are a particular class and if that surpasses a threshold, then all labels are made that same label --- probs: an nparray of (n_samples, n_classes) probabilities such that foreach sample, the sum of probabilities across classes adds up to 1. In case supplied array is of shape (n_samples,) it will be converted to multiclass using this module's _binary_probs_to_multiclass method return: a list of len n_samples, with the ith sample being the predicted label of that sample. this prediction would usually also incorporate somehow the samples before and after the current sample ''' color.INFO('INFO', 'shape of input probs is: {}'.format(probs.shape)) if probs.shape[-1] == 1: probs = _binary_probs_to_multiclass(probs) color.INFO('INFO', 'received probs of shape {}'.format(str(probs.shape))) if algorithm == 'threshold': n, t = params['n'], params['t'] labels = [np.argmax(timechunk) for timechunk in probs] for i in range(len(probs) - n + 1): # print(np.average(probs[i:i+n], axis=0)[0], # np.average(probs[i:i+n], axis=0)[1]) for c in range(probs.shape[-1]): avg = np.average(probs[i:i + n], axis=0)[c] if avg >= t: # color.INFO('DEBUG', # 'found threshold window of {} at [{}:{}] for class {}'.format(avg, i, i+n, c)) labels[i:i + n] = [c for _ in range(n)] return labels elif algorithm == 'hmm' or algorithm == 'viterbi': # define default emission probabilities default = { 0: pmgt.DiscreteDistribution({ '0': 0.7, '1': 0.3 }), 1: pmgt.DiscreteDistribution({ '0': 0.2, '1': 0.8 }) } states = [] for c in [*range(probs.shape[-1])]: state = pmgt.State(params.get(c, default[c]), name=str(c)) states += [state] model = pmgt.HiddenMarkovModel('laugh-decoder') model.add_states(states) if 'transitions' in params: model.add_transitions(params['transitions']) else: # start must always go to state 0 model.add_transitions([model.start, states[0]], [states[0], model.end], [1., .1]) model.add_transitions([states[0], states[0], states[1], states[1]], [states[0], states[1], states[0], states[1]], [.5, .4, .2, .8]) model.bake() # if verbose: # model.plot() # plotting is weird labels = [str(np.argmax(entry)) for entry in probs] labels = model.predict(sequence=labels, algorithm='viterbi') return labels[1:-1] else: raise NotImplementedError