def getCategoryDict(): #le tableau des catégories Cat = encod.get_categories(data+"/voc_etats") dico = {} for c in Cat: dico[c] = 0 return dico
def getCategoryDict(): """Création d'un dictionnaire pour les categories""" # le tableau des catégories Cat = encod.get_categories(data + "/voc_etats") dico = {} for c in Cat: dico[c] = 0 return dico
def initHmm(self, listObservables): self.states = encod.get_categories(data+"/voc_etats") self.Pi = {} for key in self.states: self.Pi[key] = 1.0/len(self.states) self.transitions = {} for state in self.states: self.transitions[state] = {} for secondState in self.states: self.transitions[state][secondState] = 1.0/len(self.states) self.emissions = {} for observable in listObservables: self.emissions[observable] = {} for state in self.states: self.emissions[observable][state] = 1.0/len(self.states) return [self.states, self.transitions, self.emissions]
def initHmm(self, listObservables): '''Initialisation du hmm lambda_0 et premier calcul pour les probabilites de transitions, emissions et initiales (equiprobabilité partout)''' self.states = encod.get_categories(data+"/voc_etats") self.Pi = {} for key in self.states: self.Pi[key] = 1.0/len(self.states) self.transitions = {} for state in self.states: self.transitions[state] = {} for secondState in self.states: self.transitions[state][secondState] = 1.0/len(self.states) self.emissions = {} for observable in listObservables: self.emissions[observable] = {} for state in self.states: self.emissions[observable][state] = 1.0/len(self.states) return [self.states, self.transitions, self.emissions]
def get_Pi_T_E(): #le tableau des observables Tab = encod.encode(data+"/voc_observables") #le tableau des catégories Cat = encod.get_categories(data+"/voc_etats") #initialisation du nombre d'occurences des différentes catégories en début de phrase I = {} for c in Cat: I[c] = 0 occur_cat = {} for c in Cat: occur_cat[c] = 0 #initialisation de la matrice des occurences des bigrames: T[i, j] = nombre de bigrammes (cj, ci) T = {} for i in Cat: T[i] = getCategoryDict() #initialisation de la matrice d'emissions des observables: T[i, j] = nombre d'observable o[j] de categorie c[i] E = {} for i in Tab: E[i] = getCategoryDict() base_app = [] file_app = open(data+"/train") start_sentence = True categorie_before = "" for line in file_app: if line != "\n": observable = line.split("\t")[0] categorie = line.split("\t")[1].replace("\n", "") #remplissage de E (les occurence d'emission et des categories) E[observable][categorie] = E[observable][categorie] + 1 occur_cat[categorie] = occur_cat[categorie] + 1 #remplissage de I (les début de phrases) if start_sentence: I[categorie] = I[categorie] + 1 start_sentence = False categorie_before = categorie elif line != "\n": #remplissage de T (les occurence des binomes) T[categorie_before][categorie] = T[categorie_before][categorie] + 1 categorie_before = categorie else: start_sentence = True ############################# # normalisation et écriture # ############################# # Pi(c) N = 0 for c in I: N = N + I[c] for c in I: I[c] = float(I[c])/N # T(c1, c2) for c1 in T: for c2 in T[c1]: T[c1][c2] = float(T[c1][c2])/occur_cat[c1] # E(c, m) for m in E: for c in E[m]: E[m][c] = float(E[m][c])/occur_cat[c] #print E return [I, T, E]
f = open(data+"/BWtest") for line in f: l = [] if line != "\n": l = line.replace("\n", "").split("\t") else: l = ["", ""] t = [l[0], l[1], ""] T.append(t) return T listState = encod.get_categories(data+"/voc_etats") listState.append("") test_table = encodeTestAsMatrix3n() listObservables = range(len(test_table)) for k in range(len(test_table)): listObservables[k] = test_table[k][0] ### On construit un hmm avec le fichier d'apprentissage ### S = app.get_Pi_T_E() I = S[0] T = S[1] E = S[2] if perturbation: for obs in listObservables: