def add_new_state_node(self, t): if len(self.variables["states"]) > t: # the state already exists in the graph. return else: if t == 0: P = self.get_prior_factor() self.factors["transition"].append(pm.DiscreteDistribution(P)) # add prior factor self.variables["states"].append(pm.State( self.factors["transition"][t], name="Damage {}".format(t))) self.add_node(self.variables["states"][t]) else: T = self.get_transition_factor() # check if we have a controlA if len(self.variables["controlAs"]) > t-1: self.factors["transition"].append(pm.ConditionalProbabilityTable(T, [self.factors["transition"][t-1],self.factors["controlA"][t-1]])) else: self.factors["transition"].append(pm.ConditionalProbabilityTable(T, [self.factors["transition"][t-1],self.factors["controlP"][t-1]])) # add RV as a node in the graph self.variables["states"].append(pm.State( self.factors["transition"][t], name="Damage {}".format(t))) self.add_node(self.variables["states"][t]) # connect node via transition edge self.add_edge(self.variables["states"][t-1], self.variables["states"][t] ) # connect node via control edge if len(self.variables["controlAs"]) > t-1: self.add_edge(self.variables["controlAs"][t-1], self.variables["states"][t] ) else: self.add_edge(self.variables["controlPs"][t-1], self.variables["states"][t] )
def hmm(nstates=2, bias=0.1): def make_bias(i, s): if i == 0: return [bias, 1 - bias][s] else: return [1 - bias, bias][s] states = [ pmg.State(pmg.DiscreteDistribution({ 0: make_bias(i, 0), 1: make_bias(i, 1) }), name='S%d' % i) for i in range(nstates) ] #trans = np.ones((nstates, nstates)) / nstates; trans = np.random.rand(nstates, nstates) for i in range(nstates): trans[i] = trans[i] / trans[i].sum() model = pmg.HiddenMarkovModel() model.add_states(states) for i in range(nstates): for j in range(nstates): model.add_transition(states[i], states[j], trans[i, j]) model.add_transition(model.start, states[i], 1.0 / nstates) model.bake() return model
def fit(self, similarBldDF): ''' climate zone, Design cooling load and principle building activity are parents attributes of main cooling equipment. Census division, main cooling equipment, cooling degree days, percentage of building cooled are four parent attribute of high efficient building Attributes: similarBldDF, a pandas DataFrame object includes a group of building similar to the proposed building.This object is used to train the Bayesian Network classifier. ''' climateZoneDict,COOLLOADDict,principleActivityNDict,MAINCLCPTList,MAINCLDict,CDD65NDict,HECSCPTList=self.attributeDistribution(similarBldDF) climateZone=pm.DiscreteDistribution(climateZoneDict) designCoolingLoad=pm.DiscreteDistribution(COOLLOADDict) principleBuildingActivity=pm.DiscreteDistribution(principleActivityNDict) coolingDegreeDays = pm.DiscreteDistribution(CDD65NDict) #MCE_CPT is the conditional probability table of main cooling equipment mainCoolingEquipmentCPT=pm.ConditionalProbabilityTable(MAINCLCPTList,[climateZone,designCoolingLoad,principleBuildingActivity]) #HECS_CPT is the conditional probability table of high efficient cooling system. highEfficientCoolingSystemCPT=pm.ConditionalProbabilityTable(HECSCPTList,[mainCoolingEquipmentCPT,principleBuildingActivity,\ coolingDegreeDays]) #the first layer parent attributes p1_climateZone=pm.Node(climateZone,name="climateZone")#climateZone p1_COOLLOAD=pm.Node(designCoolingLoad,name="COOLLOAD")#COOLLOAD p1_principleActivity=pm.Node(principleBuildingActivity,name="principleActivity")#principleActivity #the second layer parent attributes #the main cooling equipment p2_MAINCL = pm.Node(mainCoolingEquipmentCPT,name="MAINCL")# p2_CDD65 = pm.Node(coolingDegreeDays,name="CDD65") #high efficient cooling system p_HECS = pm.Node(highEfficientCoolingSystemCPT, name="highEfficientCoolingSystemCPT") #the Bayesian Network for the main cooling equipment modelMCE = pm.BayesianNetwork("Main cooling equipment") modelMCE.add_nodes(p1_climateZone,p1_COOLLOAD,p1_principleActivity,p2_MAINCL,p2_CDD65,p_HECS) modelMCE.add_edge(p1_climateZone,p2_MAINCL) modelMCE.add_edge(p1_COOLLOAD,p2_MAINCL) modelMCE.add_edge(p1_principleActivity,p2_MAINCL) modelMCE.add_edge(p2_MAINCL,p_HECS) modelMCE.add_edge(p1_principleActivity,p_HECS) modelMCE.add_edge(p2_CDD65,p_HECS) modelMCE.bake() self.BN4CLfitted=modelMCE
def create_independent_dist(feature, seed): unique_val = np.unique(feature) init_dict = {} random_state = check_random_state(seed) init_prob = random_state.rand(len(unique_val), 1) init_prob = init_prob / init_prob.sum() for idx, i in enumerate(unique_val): init_dict[int(i)] = init_prob[idx].item() return pom.DiscreteDistribution(init_dict)
def __create_direction_distribution(self): direction_dict = {} max_prob_value = 0 for i in range(action.Action.num_actions): if self.__direction_probs[i] > max_prob_value: self.__max_prob_dir = i max_prob_value = self.__direction_probs[i] direction_dict[ action.Action.action2string[i]] = self.__direction_probs[i] self.__direction_dist = pm.DiscreteDistribution(direction_dict)
def probparam(dfent, typ, hdeb, hfin, mot): #Pourquoi pas rajouter un nom en paramètre et le dico pour ajouter au dico le modèle df = dfent.copy() df = df[(df['V2_MORIHDEP'] <= pd.to_timedelta(hfin, unit='h')) & (df['V2_MORIHDEP'] >= pd.to_timedelta(hdeb, unit='h')) & (np.floor(df['V2_MMOTIFDES']) == mot) & (df['TYPE_JOUR'] == typ)] prob_dist = pg.LogNormalDistribution(0, 1) prob_dist.fit(df['V2_MDISTTOT'].values.flatten(), weights=df['POIDS_JOUR'].values.flatten()) pg_mu, pg_sigma = prob_dist.parameters Q = [ np.exp(pg_mu + np.sqrt(2 * pg_sigma) * erfinv(2 * x - 1)) for x in [0.25, 0.5, 0.75] ] Q.append(100) Q.insert(0, 0) prob_dist prob_quartdist = {} for i in range(0, len(Q) - 1): df_dist = df[(df['V2_MDISTTOT'] <= Q[i + 1]) & (df['V2_MDISTTOT'] > Q[i])] df_dur = df_dist.copy() df_dur['V2_DUREE'] = pd.to_timedelta( df_dur['V2_DUREE'], unit='Min').dt.round('1Min').dt.total_seconds() // 60 prob_dist_dur = pg.LogNormalDistribution.from_samples( df_dur['V2_DUREE'].values.flatten(), weights=df_dur['POIDS_JOUR'].values.flatten()) # df_dur=df_dur.groupby(by=['V2_DUREE'], as_index=False)['POIDS_JOUR'].sum() # plt.plot(df_dur['V2_DUREE'].values.flatten(),df_dur['POIDS_JOUR'].values.flatten()/(df_dur['POIDS_JOUR'].sum()),df_dur['V2_DUREE'].values.flatten(),prob_dist_dur.probability(df_dur['V2_DUREE'].values.flatten())) # plt.show() del df_dur df_hdep = df_dist.copy() df_hdep['V2_MORIHDEP'] = df_hdep['V2_MORIHDEP'].dt.round('15Min') df_hdep = df_hdep.groupby(by=['V2_MORIHDEP'], as_index=False)['POIDS_JOUR'].sum() df_hdep = df_hdep.sort_values(['V2_MORIHDEP']) prob_dist_hdep = pg.DiscreteDistribution( dict( zip( df_hdep['V2_MORIHDEP'], df_hdep['POIDS_JOUR'].values.flatten() / (df_hdep['POIDS_JOUR'].sum())))) # prob_test = pg.GaussianKernelDensity() # prob_test.fit(df_dist['V2_MORIHDEP'].dt.total_seconds()/60) # n =plt.hist(df_dist['V2_MORIHDEP'].dt.total_seconds()/60, density = True , bins = (hfin - hdeb)*4) # plt.plot(n[1],prob_test.probability(n[1])) # plt.show() # print(df_dist['V2_MORIHDEP'].count()) prob_quartdist[(Q[i], Q[i + 1])] = (prob_dist_dur, prob_dist_hdep) return (prob_dist, prob_quartdist)
def generate_model(state, transition): # Setup hmm model = pomegranate.HiddenMarkovModel() A = pomegranate.State(pomegranate.DiscreteDistribution({'A': state, 'B': 1-state}), name='A') B = pomegranate.State(pomegranate.DiscreteDistribution({'A': 1-state, 'B': state}), name='B') model.add_transition(model.start, A, 0.5) model.add_transition(model.start, B, 0.5) model.add_transition(A, A, 1-transition) model.add_transition(A, B, transition) model.add_transition(B, A, transition) model.add_transition(B, B, 1-transition) model.add_transition(A, model.end, 0.5) model.add_transition(B, model.end, 0.5) model.bake(verbose=False) return model
def run(): # Load dataset path = 'datasets/' with open(path + datasetload, 'rb') as f: a = pickle.load(f) X = a[0] X = X.astype(int) # Create HMM D = bond_dimension N = X.shape[1] d = np.max(X + 1) list_of_states = [] for i in xrange(N): list_of_states.append([]) for u in xrange(bond_dimension): dictionnary = dict() for l in xrange(d): dictionnary[str(l)] = np.random.rand() list_of_states[i].append( pomegranate.State( pomegranate.DiscreteDistribution(dictionnary))) model = pomegranate.HiddenMarkovModel() for i in xrange(N - 1): for d in xrange(D): for d2 in xrange(D): model.add_transition(list_of_states[i][d], list_of_states[i + 1][d2], np.random.rand()) for d in xrange(D): model.add_transition(model.start, list_of_states[0][d], np.random.rand()) for d in xrange(D): model.add_transition(list_of_states[N - 1][d], model.end, np.random.rand()) model.bake() # Train HMM begin = time.time() sequencetrain = [[str(i) for i in v] for v in X] np.random.seed() model.fit(sequencetrain,algorithm='baum-welch',stop_threshold=1e-50,min_iterations=1000,\ max_iterations=n_iter) u = 0 for i in sequencetrain: u += model.log_probability(i) accuracy = -u / len(sequencetrain) time_elapsed = time.time() - begin print("Negative log likelihood = %.3f" % (accuracy)) print("Time elapsed = %.2fs" % (time_elapsed))
def make_hmm_model(emission_mat, transition_probs): model = pomegranate.HiddenMarkovModel('ndf') ictal_emissions = {i:emission_mat[1,i] for i in range(emission_mat.shape[1])} baseline_emissions = {i:emission_mat[0,i] for i in range(emission_mat.shape[1])} ictal = pomegranate.State(pomegranate.DiscreteDistribution(ictal_emissions ), name = '1') baseline = pomegranate.State(pomegranate.DiscreteDistribution(baseline_emissions), name = '0') model.add_state(ictal) model.add_state(baseline) model.add_transition( model.start, ictal, 0.05 ) model.add_transition( model.start, baseline, 99.95) model.add_transition( baseline, baseline, transition_probs[0,0] ) model.add_transition( baseline, ictal, transition_probs[0,1] ) model.add_transition( ictal, ictal , transition_probs[1,1] ) model.add_transition( ictal, baseline, transition_probs[1,0] ) model.bake(verbose=False ) return model
def add_new_controlA_node(self, t, control): # 'Actual' or enacted control node C = self.get_controlA_factor(control) self.factors["controlA"].append(pm.DiscreteDistribution(C)) self.variables["controlAs"].append(pm.State(self.factors["controlA"][t], name="ControlA {}".format(t))) # add RV as a node in the graph self.add_node(self.variables["controlAs"][t]) if len(self.variables["states"]) > t+1: T = self.get_transition_factor() self.factors["transition"][t+1].__init__ (T, [self.factors["transition"][t],self.factors["controlA"][t]]) self.variables["states"][t+1].__init__(self.factors["transition"][t+1], name="Damage {}".format(t+1)) for idx, (a,b) in enumerate(self.edges): if a.name == "ControlP {}".format(t) and b.name == "Damage {}".format(t+1): self.edges[idx] = (self.variables["controlAs"][t], self.variables["states"][t+1])
def probDiagnose(percList): """ Funzione che riceve in input una lista con tre tuple contenenti ognuna una categoria e la loro percentuale e, attraverso l'uso di una rete bayesiana ed il calcolo delle probabilità condizionate, restituisce una lista con le probabilità delle categorie delle diagnosi condizionate dalle categorie dei sintomi Parameters ---------- percList: list Lista contenente tre tuple: ogni tupla contiene una categoria e la rispettiva percentuale(per i sintomi) Returns ------- condProbList: list Lista contenente tre tuple: ogni tupla contiene una categoria e la rispettiva probabilità(per le diagnosi) """ import pomegranate as pg sym = pg.DiscreteDistribution({ 'gen': 192. / 389, 'sup': 125. / 389, 'inf': 72. / 389 }) diagn = pg.ConditionalProbabilityTable( [['gen', 'gen', 0.5], ['gen', 'sup', 0.25], ['gen', 'inf', 0.25], ['sup', 'gen', 0.20], ['sup', 'sup', 0.75], ['sup', 'inf', 0.05], ['inf', 'gen', 0.2], ['inf', 'sup', 0.05], ['inf', 'inf', 0.75]], [sym]) s1 = pg.State(sym, name="sym") s2 = pg.State(diagn, name="diagn") model = pg.BayesianNetwork("Diagnose finder") model.add_states(s1, s2) model.add_edge(s1, s2) model.bake() condProbList = [] for i in percList: beliefs1 = model.predict_proba({'sym': i[1]}) condProbList.append(beliefs1[1].parameters[0]) return condProbList
def _solve_bayes_network(cpts, conditionals=None): print(f'cpts: {cpts}') print(f'conditionals: {cpts}') model = pmg.BayesianNetwork("User Produced Model") states = [] distributions = [] cond = [] _cond_stage = [] def _translator(string): if string == 0 or string == '0': return 'True' elif string == 1 or string == '1': return 'False' else: return None counter = 0 for i, name in enumerate(cpts.keys()): temp_dict = cpts[name].to_dict() if name not in conditionals: for k in temp_dict.keys(): distributions.append(pmg.DiscreteDistribution(temp_dict[k])) states.append(pmg.State(distributions[counter], name=name)) counter += 1 else: _cond_stage.append(i) for col in temp_dict.keys(): for val in temp_dict[col].keys(): arr = [_translator(col), val, temp_dict[col][val]] cond.append(arr) print(f'cond: {cond}') states.append( pmg.State(pmg.ConditionalProbabilityTable(cond, distributions), name=name)) for i, s in enumerate(states): print(f'i: {i}') print(f's: {s}') model.add_states(s) if i not in _cond_stage and _cond_stage: model.add_edge(s, states[_cond_stage[0]]) model.bake() return model
def _calculate_one(self) -> np.ndarray: """Run the calculation """ # Get the sequence of states dist_1 = pm.DiscreteDistribution( {"wet": 0.5, "dry": 0.5} ) # random starting point dist_2 = pm.ConditionalProbabilityTable( [ ["wet", "wet", self.param["pi_1"]], ["wet", "dry", 1 - self.param["pi_1"]], ["dry", "wet", 1 - self.param["pi_2"]], ["dry", "dry", self.param["pi_2"]], ], [dist_1], ) markov_chain = pm.MarkovChain([dist_1, dist_2]) years = self._get_time("all") states = markov_chain.sample(years.size) # Get the conditional expected value mu_1_vec = self.param["mu_1"] + self.param["gamma_1"] * years mu_2_vec = self.param["mu_2"] + self.param["gamma_2"] * years mu_vec = mu_1_vec mu_vec[np.where(np.array(states) == "wet")] = mu_2_vec[ np.where(np.array(states) == "wet") ] # get conditional variance sigma_vec = self.param["coeff_var"] * mu_vec sigma_vec[sigma_vec < self.param["sigma_min"]] = self.param["sigma_min"] # get and the streamflow sflow = np.exp(np.random.normal(loc=mu_vec, scale=sigma_vec)) return sflow
def export_pom(net, by='index'): ''' Returns ------- pomegranate BN Model based on given DAG. Assume my "sort" function correctly returns a list where children are allways ranked higher than parents. If Pommegranate is used to estimate model likelihood, all outcomes must be of the same data type. Either All int or all string. ''' s = topoSort(net.export_nds()) model = pm.BayesianNetwork("DIY_GRN") # Convert Top Level nodes to Discrete distributions top = [i for i in s if len(i.par) == 0] topStates = {} for n in top: pr = n.cpt['Prob'].to_dict() if by == 'index': va = n.cpt[n.idx].to_dict() else: va = n.cpt[n.label].to_dict() dist = {} for v in va.keys(): dist[va[v]] = pr[v] dist = pm.DiscreteDistribution(dist) if by == 'index': state = pm.Node(dist, name=str(n.idx)) topStates[str(n.idx)] = state else: state = pm.Node(dist, name=str(n.label)) topStates[str(n.label)] = state model.add_state(state) # Convert Depent Nodes to Conditional Distributions dep = [i for i in s if len(i.par) != 0] depStates = {} for n in dep: # Convert floats cpt outcome levels to integers if needed if isinstance(n.cpt.iloc[0, 0], np.int64): cpt = [fl(l) for l in n.cpt.values.tolist()] else: cpt = n.cpt.values.tolist() # Vector of ID for each parent if by == 'index': par_id = [str(i.idx) for i in n.par] else: par_id = [str(i.label) for i in n.par] # Validate that all parents have been processed for p in par_id: if (not p in topStates.keys()) and (not p in depStates.keys()): print("Problem with parent:", p, "of node:", n.idx) return [topStates, depStates] par = [ topStates[i] if i in topStates.keys() else depStates[i] for i in par_id if i in topStates.keys() or i in depStates.keys() ] cpt = pm.ConditionalProbabilityTable(cpt, [p.distribution for p in par]) if by == 'index': state = pm.Node(cpt, name=str(n.idx)) depStates[str(n.idx)] = state else: state = pm.Node(cpt, name=str(n.label)) depStates[str(n.label)] = state # Add node to model model.add_state(state) # Add edges from parent to this node for p in par: model.add_edge(p, state) # Assemble and "Bake" model model.bake() return (model)
T = np.array([[1 - beta, beta], [1 - alpha, alpha]]) # Matrice de transition temporaire B = np.array([[1, 0], [0, 1]]) # matrice d'émission temporaire dicoObs = { 'sun': 0, 'rain': 1 } # pour transformer les chaines en entier (0 et 1) dicoState = {'sunny': 0, 'rainy': 1} ## Creation de la chaine de Markov model = pg.HiddenMarkovModel(name="partie 1") # Creation instance # Matrice d'emission sunny = pg.State(pg.DiscreteDistribution({ 'sun': B[0, 0], 'rain': B[0, 1] }), name='sunny') # Creation etat beau temps et prob emission rainy = pg.State(pg.DiscreteDistribution({ 'sun': B[1, 0], 'rain': B[1, 1], }), name='rainy') # Création de l'état pluie et prob emission # Matrice de transition model.add_transitions(model.start, [sunny, rainy], [gamma, 1 - gamma]) # Probs initiales model.add_transitions(sunny, [sunny, rainy], [T[0, 0], T[0, 1]]) # transitions depuis sunny model.add_transitions(rainy, [sunny, rainy], [T[1, 0], T[1, 1]]) # transition depuis rainy
for name in state_names} # type: Dict[str, Dict[int, float]] for _, row in data.iterrows(): val = row['eeg'] # type: int state = row['doctor'] # type: str summary[state][val] += 1 for key in summary.keys(): total = sum(summary[key].values(), 0.0) summary[key] = {k: v / total for k, v in summary[key].items()} states = {} # type: Dict[str, pg.State] for name in state_names: dist = pg.DiscreteDistribution(summary[name]) states[name] = pg.State(dist, name=name) #counts transition probabilites chain_model = pg.MarkovChain.from_samples([state_sequence]) #creates empty hidden markov model with name 'prediction' model = pg.HiddenMarkovModel('prediction') #adds the states to the model model.add_states(list(states.values())) #sets the starting probability for state 'Wake' to 1.0 model.add_transition(model.start, states['Wake'], 1.0) #insert the emission probabilities, that we computed in summary for prob in chain_model.distributions[1].parameters[0]: state1 = states[prob[0]]
sf = 1 - cdf # ----------------------------------------------------------------------------- # Three check systems # ----------------------------------------------------------------------------- # Chain model # A -> B -> C n = 5 t = 4 ppd_A = { "S": sf[n, t - 2], "F": 1 - sf[n, t - 2], } dist_A = pm.DiscreteDistribution(ppd_A) cpd_B_A = [ ["S", "S", sf[n + 1, t - 1]], ["S", "F", 1 - sf[n + 1, t - 1]], ["F", "S", sf[n, t - 1]], ["F", "F", 1 - sf[n, t - 1]], ] dist_B_A = pm.ConditionalProbabilityTable(cpd_B_A, [dist_A]) cpd_C_B = [ ["S", "S", sf[n + 1, t]], ["S", "F", 1 - sf[n + 1, t]], ["F", "S", sf[n, t]], ["F", "F", 1 - sf[n, t]],
max_cdf = np.empty((20, s)) max_pmf = np.empty((20, s)) for n in range(1, 21): max_cdf[(n - 1), :] = cdf**n max_pmf[(n - 1), 0] = max_cdf[(n - 1), 0] max_pmf[(n - 1), 1:] = max_cdf[(n - 1), 1:] - max_cdf[(n - 1), :-1] sf = np.column_stack([np.zeros(20), 1 - max_cdf]) # Chain model # A -> B -> C n = 6 t = s - 3 distA = pm.DiscreteDistribution({ "F": max_cdf[n - 1, t], "S": 1 - max_cdf[n - 1, t] }) cpd = [ ["F", "F", max_cdf[n - 1, t]], ["F", "S", 1 - max_cdf[n - 1, t]], ["S", "F", max_cdf[n, t]], ["S", "S", 1 - max_cdf[n, t]], ] distB_A = pm.ConditionalProbabilityTable(cpd, [distA]) distC_B = pm.ConditionalProbabilityTable(cpd, [distB_A]) # distD_C = pm.ConditionalProbabilityTable(cpd, [distC_B]) A = pm.Node(distA, name="A") B = pm.Node(distB_A, name="B")
def modele_jours_type(df, df_gens): #Va renvoyer les modèles associés au type de jour (domicile travail, domicile travail loisirs), le nombre de loisir par jour, le moment de ces loisirs, ect data_journee = pd.DataFrame() data_journee = df.copy() typjour = [name for name, f in fb.__dict__.items() if callable(f)] dic_tranchlois = {} dic_nblois = {} dic_parklois = {} dic_dureelois = {} dic_retourdom = {} data_journee.loc[:, 'V2_MORIHDEP'] = pd.to_timedelta( data_journee['V2_MORIHDEP'], errors='coerce') data_journee['MOTIF_HDEP'] = list( zip(data_journee['V2_MMOTIFDES'], data_journee['V2_MORIHDEP'])) data_journee = data_journee.sort_values([ 'IDENT_IND', 'V2_MORIHDEP' ]).groupby(['IDENT_IND' ])['MOTIF_HDEP'].apply(lambda x: list(x)).reset_index() nb_profil_mob = [ len(data_journee[data_journee['MOTIF_HDEP'].map(f)] ['MOTIF_HDEP'].values) for name, f in fb.__dict__.items() if callable(f) ] nb_profil_mob = [x / sum(nb_profil_mob) for x in nb_profil_mob] profil_mob = pg.DiscreteDistribution( dict( zip([name for name, f in fb.__dict__.items() if callable(f)], nb_profil_mob))) # print(profil_mob) data_typjour = pd.DataFrame() data_typjour = df.copy() data_typjour.loc[:, 'V2_MORIHDEP'] = pd.to_timedelta( data_typjour['V2_MORIHDEP'], errors='coerce') data_typjour['LIMOTIF'] = list(data_typjour['V2_MMOTIFDES']) data_typjour = data_typjour.sort_values([ 'IDENT_IND', 'V2_MORIHDEP' ]).groupby(['IDENT_IND' ])['LIMOTIF'].apply(lambda x: list(x)).reset_index() #On calcule différents paramètres dont on aura besoin pour modéliser les jours (une partie nombre loisirs/individu, type de jour, catégorisation des loisirs par leur tranche horaire) data_typjour['NBLOIS'] = data_typjour['LIMOTIF'].apply(compter_lois) data_typjour['TYPE_JOUR'] = data_journee['MOTIF_HDEP'].apply(trier_jour) data_typjour['NBLOIS_MAT'] = data_journee['MOTIF_HDEP'].apply( compter_matin) data_typjour['NBLOIS_MIDI'] = data_journee['MOTIF_HDEP'].apply( compter_midi) data_typjour['NBLOIS_SOIR'] = data_journee['MOTIF_HDEP'].apply( compter_soir) data_typjour['NBLOIS_GRANDPARK'] = data_typjour['LIMOTIF'].apply( compter_grand_parking) data_typjour['NBLOIS_PETITPARK'] = data_typjour['LIMOTIF'].apply( compter_petit_parking) tranche_hor_lois = ['NBLOIS_MAT', 'NBLOIS_MIDI', 'NBLOIS_SOIR'] park_lois = ['NBLOIS_GRANDPARK', 'NBLOIS_PETITPARK'] data_typjour['RETDOM'] = data_typjour['LIMOTIF'].apply(retour_maison) #bout de code permettant d'afficher un histogramme de répartition des trajets de loisir dans la journée en fonction du type de jour # data_typjour['LOIS_HIST'] = data_journee['MOTIF_HDEP'].apply(hist_lois) # arr_hist = data_typjour[data_typjour['TYPE_JOUR']=='domtravailloisirs']['LOIS_HIST'].values # hist_tot = np.zeros(24*2) # for x in arr_hist: # hist_tot = hist_tot + x # plt.bar(np.arange(24*2), hist_tot/sum(hist_tot)) #Calcule les probas de loisirs selon les tranches horaires pour chaque type de jour for typ in typjour: if "loisirs" in typ: nb_tranchlois = [ data_typjour[data_typjour['TYPE_JOUR'] == typ][x].sum() for x in tranche_hor_lois ] # print(nb_tranchlois) nb_tranchlois = [x / sum(nb_tranchlois) for x in nb_tranchlois] dic_tranchlois[typ] = pg.DiscreteDistribution( dict(zip([x for x in tranche_hor_lois], nb_tranchlois))) #Calcule les probas du nombre de loisirs pour chaque type de jour for typ in typjour: if "loisirs" in typ: nb_lois = data_typjour[data_typjour['TYPE_JOUR'] == typ]['NBLOIS'].value_counts(normalize=True) nb_lois = nb_lois.drop(nb_lois[nb_lois < 0.005].index) dic_nblois[typ] = pg.DiscreteDistribution( dict(zip(nb_lois.index, nb_lois.values / nb_lois.sum()))) # print(dic_nblois[typ]) # print(profil_lois_domlois) #Calcule la proportion de loisir avec petit ou grand parking pour chaque type de jour. la répartition est toujours 2/3 petits parking 1/3 grand quelque soit le type de jour for typ in typjour: if "loisirs" in typ: nb_parklois = [ data_typjour[data_typjour['TYPE_JOUR'] == typ][x].sum() for x in park_lois ] nb_parklois = [x / sum(nb_parklois) for x in nb_parklois] dic_parklois[typ] = pg.DiscreteDistribution( dict(zip(['gpark', 'ppark'], nb_parklois))) #Calcule la durée du loisir en fonction de son parking et dy type de jour. C'est selon une loi exponentielle pour les petits parkings et une log normale pour les grands parkings df_dureelois = df[['IDENT_IND', 'V2_DURACT', 'V2_MMOTIFDES' ]].merge(data_typjour[['IDENT_IND', 'TYPE_JOUR']], left_on='IDENT_IND', right_on='IDENT_IND', how='left') for i, typ in enumerate(typjour): if "loisirs" in typ: dic_dureelois[typ] = {} dur_ppark = df_dureelois[ (df_dureelois['TYPE_JOUR'] == typ) & (df_dureelois['V2_MMOTIFDES'].apply(est_petit_parking)) & (df_dureelois['V2_DURACT'] > 0) & (df_dureelois['V2_DURACT'] < 301)]['V2_DURACT'] dur_gpark = df_dureelois[ (df_dureelois['TYPE_JOUR'] == typ) & (df_dureelois['V2_MMOTIFDES'].apply(est_grand_parking)) & (df_dureelois['V2_DURACT'] > 0) & (df_dureelois['V2_DURACT'] < 301)]['V2_DURACT'] prob_gpark = pg.LogNormalDistribution(0, 1) prob_gpark.fit(dur_gpark.values.flatten()) # plt.figure(2*i) # n = plt.hist(dur_gpark.values, density=True, bins=60) # plt.plot(n[1], prob_gpark.probability(n[1])) dic_dureelois[typ]['gpark'] = prob_gpark prob_ppark = pg.ExponentialDistribution(1) prob_ppark.fit(dur_ppark.values.flatten()) # plt.figure(2*i+1) # n = plt.hist(dur_ppark.values, density=True, bins=60) # plt.plot(n[1], prob_ppark.probability(n[1])) dic_dureelois[typ]['ppark'] = prob_ppark # data_typjour=data_typjour.merge(df_gens[['IDENT_IND','V1_BTRAVT','V1_BTRAVHS','SITUA']], left_on='IDENT_IND', right_on='IDENT_IND', how='left') df_sortie = df.merge(data_typjour[['IDENT_IND', 'TYPE_JOUR', 'RETDOM']], left_on='IDENT_IND', right_on='IDENT_IND', how='left') # print(data_typjour[data_typjour['TYPE_JOUR']=='']) for typ in typjour: if 'loisirs' in typ: dat = data_typjour[data_typjour['TYPE_JOUR'] == typ]['RETDOM'] taux_retour = dat.sum() / dat[dat == 0].count() dic_retourdom[typ] = pg.DiscreteDistribution( dict(zip([1, 0], [taux_retour, 1 - taux_retour]))) return profil_mob, dic_nblois, dic_tranchlois, dic_parklois, dic_dureelois, df_sortie
def __init__(self, map_manager, agent_type, sampling=False): self.__map_manager = map_manager self.__position = None self.__percept = None self.__action_map = None self.__target_threshold = 0.3 self.__max_prob_dir = None # target dot products self.__target_dots = [None] * action.Action.num_actions # Random variables marked as true will be considered in the bayesian net self._considered = { 'target': True, 'danger': True, 'obstruction': True, 'visibility': True, 'hider': True, 'seeker': True, 'blockage': True } if agent_type == agent.AgentType.Seeker: self._considered['danger'] = False self.__sampling = sampling # Probability distributions self.__d_direction = [ pm.DiscreteDistribution({ 'T': 0.5, 'F': 0.5 }) for i in range(action.Action.num_actions) ] self.__s_direction = [ pm.State(self.__d_direction[i], name='direction_' + str(i)) for i in range(action.Action.num_actions) ] # Random vars, probability distributions and state vars of considered variables # in the bayesian net if self._considered['target']: self.__r_target = [None] * action.Action.num_actions self.__d_target = [None] * action.Action.num_actions self.__s_target = None if self._considered['danger']: self.__r_danger = [None] * action.Action.num_actions self.__d_danger = [ pm.ConditionalProbabilityTable( [['T', '0', 0.99], ['T', '1', 0.01], ['F', '0', 0.5], ['F', '1', 0.5]], [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_danger = [ pm.State(self.__d_danger[i], name='danger_' + str(i)) for i in range(action.Action.num_actions) ] if self._considered['obstruction']: self.__r_obstruction = [None] * action.Action.num_actions self.__d_obstruction = [ pm.ConditionalProbabilityTable( [['T', '0', 0.001], ['T', '1', 0.003], ['T', '2', 0.006], ['T', '3', 0.99], ['F', '0', 1. / 4], ['F', '1', 1. / 4], ['F', '2', 1. / 4], ['F', '3', 1. / 4]], [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_obstruction = [ pm.State(self.__d_obstruction[i], name='obstruction_' + str(i)) for i in range(action.Action.num_actions) ] if self._considered['visibility']: self.__r_visibility = [None] * action.Action.num_actions self.__d_visibility = [ pm.ConditionalProbabilityTable( [['T', '0', 0.001], ['T', '1', 0.003], ['T', '2', 0.006], ['T', '3', 0.99], ['F', '0', 1. / 4], ['F', '1', 1. / 4], ['F', '2', 1. / 4], ['F', '3', 1. / 4]], [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_visibility = [ pm.State(self.__d_visibility[i], name='visibility_' + str(i)) for i in range(action.Action.num_actions) ] cpt_a = [['T', '0', 0.9], ['T', '1', 0.066], ['T', '2', 0.033], ['F', '0', 1. / 3], ['F', '1', 1. / 3], ['F', '2', 1. / 3]] cpt_b = [['T', '0', 0.9], ['T', '1', 0.077], ['T', '2', 0.022], ['F', '0', 1. / 3], ['F', '1', 1. / 3], ['F', '2', 1. / 3]] target_cpt = None if self._considered['hider']: if agent_type == agent.AgentType.Hider: target_cpt = cpt_a elif agent_type == agent.AgentType.Seeker: target_cpt = cpt_b self.__r_hider = [None] * action.Action.num_actions self.__d_hider = [ pm.ConditionalProbabilityTable(target_cpt, [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_hider = [ pm.State(self.__d_hider[i], name='hider_' + str(i)) for i in range(action.Action.num_actions) ] if self._considered['seeker']: if agent_type == agent.AgentType.Hider: target_cpt = cpt_b elif agent_type == agent.AgentType.Seeker: target_cpt = cpt_a self.__r_seeker = [None] * action.Action.num_actions self.__d_seeker = [ pm.ConditionalProbabilityTable(target_cpt, [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_seeker = [ pm.State(self.__d_seeker[i], name='seeker_' + str(i)) for i in range(action.Action.num_actions) ] if self._considered['blockage']: self.__r_blockage = [None] * action.Action.num_actions self.__d_blockage = [ pm.ConditionalProbabilityTable( [['T', '0', 0.999999], ['T', '1', 0.000001], ['F', '0', 0.5], ['F', '1', 0.5]], [self.__d_direction[i]]) for i in range(action.Action.num_actions) ] self.__s_blockage = [ pm.State(self.__d_blockage[i], name='blockage_' + str(i)) for i in range(action.Action.num_actions) ] # State objects(for pomegranate) library which hold both the distribution as well as name self.__model = None self.__inferred_results = None self.__direction_probs = [None] * action.Action.num_actions self.__direction_dist = None
def get_discrete_distribution(self): out = pg.DiscreteDistribution(self._rows.get_dict()) return out
def export_pom(self): ''' Returns ------- pomegranate BN Model based on given DAG. Assume my "sort" function correctly returns a list where children are allways ranked higher than parents ''' s = self.sort_nodes(l=list(self.nds.values())) model = pm.BayesianNetwork("DIY_GRN") # Convert Top Level nodes to Discrete distributions top = [i for i in s if len(i.par) == 0] topStates = {} for n in top: pr = n.cpt['Prob'].to_dict() va = n.cpt[n.idx].to_dict() dist = {} for v in va.keys(): dist[va[v]] = pr[v] dist = pm.DiscreteDistribution(dist) state = pm.Node(dist, name="G" + str(n.idx)) topStates["G" + str(n.idx)] = state model.add_state(state) # Convert Depent Nodes to Conditional Distributions dep = [i for i in s if len(i.par) != 0] depStates = {} for n in dep: # Convert floats cpt outcome levels to integers if needed if isinstance(n.cpt.iloc[0, 0], np.int64): cpt = [fl(l) for l in n.cpt.values.tolist()] else: cpt = n.cpt.values.tolist() # Vector of ID for each parent par_id = ["G" + str(i.idx) for i in n.par] # Validate that all parents have been processed for p in par_id: if (not p in topStates.keys()) and (not p in depStates.keys()): print("Problem with parent:", p, "of node:", n.idx) return [topStates, depStates] # Get all parents found in the topStates dict par = [topStates[i] for i in par_id if i in topStates.keys()] # Add all parents in the depStates dict par = par + [depStates[i] for i in par_id if i in depStates.keys()] cpt = pm.ConditionalProbabilityTable(cpt, [p.distribution for p in par]) state = pm.Node(cpt, name="G" + str(n.idx)) depStates["G" + str(n.idx)] = state # Add node to model model.add_state(state) # Add edges from parent to this node for p in par: model.add_edge(p, state) # Assemble and "Bake" model model.bake() return (topStates, depStates, model)
#Evaluer la journée type avec map filter data_journee = data_filt.copy() data_journee.loc[:, 'V2_MORIHDEP'] = pd.to_timedelta(data_journee['V2_MORIHDEP'], errors='coerce') data_journee['MOTIF_HDEP'] = list( zip(data_journee['V2_MMOTIFDES'], data_journee['V2_MORIHDEP'])) data_journee = data_journee.sort_values(['IDENT_IND', 'V2_MORIHDEP']).groupby( ['IDENT_IND'])['MOTIF_HDEP'].apply(lambda x: list(x)).reset_index() nb_profil_mob = [ len(data_journee[data_journee['MOTIF_HDEP'].map(f)]['MOTIF_HDEP'].values) for name, f in fb.__dict__.items() if callable(f) ] nb_profil_mob = [x / sum(nb_profil_mob) for x in nb_profil_mob] profil_mob = pg.DiscreteDistribution( dict( zip([name for name, f in fb.__dict__.items() if callable(f)], nb_profil_mob))) li_prob = [] (trav_matin_dist, trav_matin_quartdist) = pm.probparam(data_filt, 5, 11, 9) (dom_midi_dist, dom_midi_quartdist) = pm.probparam(data_filt, 11, 14, 1) (trav_aprem_dist, trav_aprem_quartdist) = pm.probparam(data_filt, 12, 15, 9) (dom_soir_dist, dom_soir_quartdist) = pm.probparam(data_filt, 15, 21, 1) li_prob.append(pm.probparam(data_filt, 5, 11, 9)) li_prob.append(pm.probparam(data_filt, 11, 14, 1)) li_prob.append(pm.probparam(data_filt, 12, 15, 9)) li_prob.append(pm.probparam(data_filt, 15, 21, 1))
B = np.array([[1, 0], [0.8, 0.2], [0, 1]]) # matrice d'émission temporaire dicoObs = { 'sun': 0, 'rain': 1 } # pour transformer les chaines en entier (0,1 et 2) dicoState = {'c.sky': 0, 'cloudy': 1, 'v.cloudy': 2} ## Creation de la chaine de Markov model = pg.HiddenMarkovModel(name="partie 2") # Creation instance # Matrice d'emission # Creation etat beau temps et prob emission sunny = pg.State(pg.DiscreteDistribution({ 'sun': B[0, 0], 'rain': B[0, 1] }), name='c.sky') # Creation etat beau temps et prob emission cloudy = pg.State(pg.DiscreteDistribution({ 'sun': B[1, 0], 'rain': B[1, 1] }), name='cloudy') # Creation etat beau temps et prob emission v_cloudy = pg.State(pg.DiscreteDistribution({ 'sun': B[2, 0], 'rain': B[2, 1] }),
start_probability = np.array([0.5, 0.5]) T = np.array([[0.6,0.4],[0.3,0.7]]) # Matrice de transition temporaire B = np.array([[0.1,0.4,0.5],[0.6,0.3,0.1]]) # matrice d'émission temporaire dicoObs={'fine': 0 ,'moyenne':1, 'epaisse':2} # pour transformer les chaines en entier (0,1 et 2) dicoState={'cold':0 ,'hot':1} ## Creation de la chaine de Markov model = pg.HiddenMarkovModel( name="partie 3" ) # Creation instance # Matrice d'emission # Creation etat beau temps et prob emission cold = pg.State( pg.DiscreteDistribution({ 'fine': B[0,0],'moyenne': B[0,1],'epaisse':B[0,2]}), name='cold' ) # Creation etat beau temps et prob emission hot = pg.State( pg.DiscreteDistribution({ 'fine': B[1,0],'moyenne': B[1,1],'epaisse':B[1,2]}), name='hot' ) # Matrice de transition model.add_transitions(model.start,[cold,hot],[0.5, 0.5]) # Probs initiales model.add_transitions(cold, [cold,hot],[T[0,0],T[0,1]]) # transitions depuis sunny model.add_transitions(hot, [cold,hot],[T[1,0],T[1,1]]) # transition depuis rainy model.add_transition( model.start, cold, start_probability[0] ) model.add_transition( model.start, hot, start_probability[1] )
def decode_sequence(probs=None, algorithm='threshold', params=dict(n=5, t=.8), verbose=True): ''' Once a model outputs probabilities for some sequence of data, that data shall be passed to this method. This method will use various ways to decode an underlying sequence in order to determine where the *actual* canned laughter was. possible algorithms to decode sequence: - 'neural' surround-n-gram neural network: this method will use a pretrained Keras model to label some sample i using the multiclass probabilities of all of the samples numbered [i-n, i-n+1, ... i, i+1, ..., i+n], i.e., n before and n afterwards. - 'hmm' HMM: this method will use a hidden Markov model with underlying states that are the same as surface states (the two state spaces for hidden and observed are equivalent). uses Viterbi to decode the underlying state sequence. requires a params to be passed as dict(c=DiscreteDistribution) where c is a class (label) and DiscreteDistribution is an instance of emission probabilities created using `pomegranate`, for each such class c (0, 1, 2, ...) - 'threshold' window and threshold method: this is simple heuristic-based method that will observe windows of length n, and if the average probability of any single class is at least t, it will assign that same class to all of the samples in that window. imagine a threshold of 0.9, then it is intuitively likely if few of the samples are labeled with some other class, they may have been accidentally so-labeled. - 'modethreshold' like 'threshold' but instead of considering avg probability, it considers what percentage of labels are a particular class and if that surpasses a threshold, then all labels are made that same label --- probs: an nparray of (n_samples, n_classes) probabilities such that foreach sample, the sum of probabilities across classes adds up to 1. In case supplied array is of shape (n_samples,) it will be converted to multiclass using this module's _binary_probs_to_multiclass method return: a list of len n_samples, with the ith sample being the predicted label of that sample. this prediction would usually also incorporate somehow the samples before and after the current sample ''' color.INFO('INFO', 'shape of input probs is: {}'.format(probs.shape)) if probs.shape[-1] == 1: probs = _binary_probs_to_multiclass(probs) color.INFO('INFO', 'received probs of shape {}'.format(str(probs.shape))) if algorithm == 'threshold': n, t = params['n'], params['t'] labels = [np.argmax(timechunk) for timechunk in probs] for i in range(len(probs) - n + 1): # print(np.average(probs[i:i+n], axis=0)[0], # np.average(probs[i:i+n], axis=0)[1]) for c in range(probs.shape[-1]): avg = np.average(probs[i:i + n], axis=0)[c] if avg >= t: # color.INFO('DEBUG', # 'found threshold window of {} at [{}:{}] for class {}'.format(avg, i, i+n, c)) labels[i:i + n] = [c for _ in range(n)] return labels elif algorithm == 'hmm' or algorithm == 'viterbi': # define default emission probabilities default = { 0: pmgt.DiscreteDistribution({ '0': 0.7, '1': 0.3 }), 1: pmgt.DiscreteDistribution({ '0': 0.2, '1': 0.8 }) } states = [] for c in [*range(probs.shape[-1])]: state = pmgt.State(params.get(c, default[c]), name=str(c)) states += [state] model = pmgt.HiddenMarkovModel('laugh-decoder') model.add_states(states) if 'transitions' in params: model.add_transitions(params['transitions']) else: # start must always go to state 0 model.add_transitions([model.start, states[0]], [states[0], model.end], [1., .1]) model.add_transitions([states[0], states[0], states[1], states[1]], [states[0], states[1], states[0], states[1]], [.5, .4, .2, .8]) model.bake() # if verbose: # model.plot() # plotting is weird labels = [str(np.argmax(entry)) for entry in probs] labels = model.predict(sequence=labels, algorithm='viterbi') return labels[1:-1] else: raise NotImplementedError
import matplotlib.pyplot as plt import matplotlib.image import itertools as it import pomegranate as pom import pygraphviz import tempfile F = 'Fraud' T = 'Travel' OD = 'OwnsDevice' FP = 'ForeignPurchase' OP = 'OnlinePurchase' travelDist = pom.DiscreteDistribution({False: 0.05, True: 0.95}) foreignPurchaseDist = pom.ConditionalProbabilityTable( [[False, False, 0.9999], [False, True, 0.0001], [True, False, 0.12], [True, True, 0.88]], [travelDist]) ownsDeviceDist = pom.DiscreteDistribution({False: 0.3, True: 0.7}) onlinePurchaseDist = pom.ConditionalProbabilityTable( [[False, False, 0.9995], [False, True, 0.0005], [True, False, 0.60], [True, True, 0.40]], [ownsDeviceDist]) fraudDist = pom.ConditionalProbabilityTable( [[False, False, False, 0.25], [False, True, False, 0.15], [True, False, False, 0.20], [True, True, False, 0.0005], [False, False, True, 0.75], [False, True, True, 0.85], [True, False, True, 0.80], [True, True, True, 0.9995]], [onlinePurchaseDist, travelDist])
return " ".join(state.name for idx, state in path[1:]) def idx_from_path(path): return [idx for idx, state in path[1:]] # make a sequence seq = [np.array(np.random.rand(100) > 0.2, dtype=int)] model = hmm(nstates=2) nstates = 2 states = [pmg.DiscreteDistribution({ 0: 0.5, 1: 0.5 }) for i in range(nstates)] trans = np.ones((nstates, nstates)) / nstates trans = np.random.rand(nstates, nstates) for i in range(nstates): trans[i] = trans[i] / trans[i].sum() model = pmg.HiddenMarkovModel().from_matrix(trans, states, np.ones(nstates) / nstates, np.zeros(nstates)) model.plot() print model.fit(seq) plt.figure(1) plt.clf()
start_probability = np.array([1, 0, 0]) T = np.array([[0.5 , 0.4 , 0.1],[0.3 , 0.4 , 0.3 ],[0.1 , 0.2 , 0.7 ]]) # Matrice de transition temporaire B = np.array([[0.5 , 0.5],[0.25,0.75], [0.75, 0.25]]) # matrice d'émission temporaire dicoObs={'pile': 0 ,'face':1} # pour transformer les chaines en entier (0,1 et 2) dicoState={'P1':0 ,'P2':1, 'P3':2} ## Creation de la chaine de Markov model = pg.HiddenMarkovModel( name="partie 5" ) # Creation instance # Matrice d'emission # Creation etat beau temps et prob emission p1 = pg.State( pg.DiscreteDistribution({ 'pile': B[0,0],'face': B[0,1]}), name='P1' ) p2 = pg.State( pg.DiscreteDistribution({ 'pile': B[1,0],'face': B[1,1]}), name='P2' ) p3 = pg.State( pg.DiscreteDistribution({ 'pile': B[2,0],'face': B[2,1]}), name='P3') # Matrice de transition model.add_transitions(model.start,[p1,p2,p3],[1, 0, 0]) # Probs initiales model.add_transitions(p1, [p1,p2,p3],[T[0,0],T[0,1],T[0,2]]) # transitions depuis sunny model.add_transitions(p2, [p1,p2,p3],[T[1,0],T[1,1],T[1,2]]) # transition depuis rainy model.add_transitions(p3, [p1,p2,p3],[T[2,0],T[2,1],T[2,2]]) # transition depuis rainy
def get_root_state(self): if len(self._parents) != 0: raise ValueError('Not a root state') p = pg.DiscreteDistribution(self._rows.get_dict()) out = pg.State(p, name=self._name) return out