def Train_Model(self): print("Now, It is training all of this set!") util_feature = FeatureExtraction.Util_Feature_Extraction() train = self.GetTraingData() Py_0 = 0.0 Py_1 = 0.0 Pfeature = {} lastPfeature = {} tmp_train = [] for line in train: tmp_train.append(line) tmp_feature = util_feature.GetFeature(tmp_train, self.NumOfFeature, self.OutputFeature, self.OutputAllFeature) # + - for f in tmp_feature: Py_0 += self.E_p_base Py_1 += self.E_p_base Pfeature[f] = [self.E_p_base, self.E_p_base] for line in tmp_train: if line[0] == '0': for col in line[1:len(line)]: if col in Pfeature: Py_0 += 1 Pfeature[col][1] += 1 if line[0] == '1': for col in line[1:len(line)]: if col in Pfeature: Py_1 += 1 Pfeature[col][0] += 1 self.StoreModel(Py_0, Py_1, Pfeature) print("Training all the set is down, The model is kept in the " + self.ModelFile)
def Training(self, fold): print("Evaluate the model by " + str(fold) + "-fold(it will cost some time to training the model)......\n") util_feature = FeatureExtraction.Util_Feature_Extraction() train = self.GetTraingData() accuracy = 0.0 percision = [0.0, 0.0] recall = [0.0, 0.0] F1 = [0.0, 0.0] for i in range(fold): Py_0 = 0.0 Py_1 = 0.0 Pfeature = {} lastPfeature = {} tmp_train = [] #print(len(train)) for line in train[0:int((i) * len(train) / fold)]: # tmp_train.append(line) for line in train[int((i + 1) * len(train) / fold):int((fold) * len(train) / fold)]: # tmp_train.append(line) tmp_feature = util_feature.GetFeature(tmp_train, self.NumOfFeature, self.OutputFeature, self.OutputAllFeature) # + - for f in tmp_feature: Py_0 += self.E_p_base Py_1 += self.E_p_base Pfeature[f] = [self.E_p_base, self.E_p_base] for line in tmp_train: if line[0] == '0': for col in line[1:len(line)]: if col in Pfeature: Py_0 += 1 Pfeature[col][1] += 1 if line[0] == '1': for col in line[1:len(line)]: if col in Pfeature: Py_1 += 1 Pfeature[col][0] += 1 #for f in Pfeature: #if(Pfeature[f][0]+Pfeature[f][1] != Py_0+Py_1) #print(Py_1+Py_0) TP = 0.0 FP = 0.0 TN = 0.0 FN = 0.0 print(str(i + 1) + "th training is down! \n") Evaluation_Result = [] Result = [] for line in train[int((i) * len(train) / fold):int((i + 1) * len(train) / fold)]: # current_P1 = math.log((Py_1 / (Py_0 + Py_1)), 10) current_P0 = math.log((Py_0 / (Py_0 + Py_1)), 10) for f in Pfeature: if f in line[1:len(line)]: current_P1 += math.log(1.25 * Pfeature[f][0] / Py_1, 10) current_P0 += math.log(2 * Pfeature[f][1] / Py_0, 10) #for col in line[1:len(line)-1]: #if col in Pfeature: #current_P1 *= Pfeature[col][0]/Py_1 #current_P0 *= Pfeature[col][1]/Py_0 if (current_P1 == 0 or current_P0 == 0): print("equal to 0 --->error") exit() if current_P1 >= current_P0: Evaluation_Result.append(1) else: Evaluation_Result.append(0) if line[0] == '1': Result.append(1) elif line[0] == '0': Result.append(0) # ''' # if (current_P1 >= current_P0 and line[0] == '1'): # TN +=1 # elif (current_P1 < current_P0 and line[0] == '0'): # TP += 1 # elif (current_P1 >= current_P0 and line[0] == '0'): # FN += 1 # elif (current_P1 < current_P0 and line[0] == '1'): # FP += 1 # else: # print("error result") # ''' # ''' # if (current_P1 >= current_P0 and line[0] == '1'): # TP +=1 # elif (current_P1 < current_P0 and line[0] == '0'): # TN += 1 # elif (current_P1 >= current_P0 and line[0] == '0'): # FP += 1 # elif (current_P1 < current_P0 and line[0] == '1'): # FN += 1 # else: # print("error result") # ''' #print("Right is"+ str((TP+TN)/(TP+TN+FP+FN))) Evaluation = precision_recall_fscore_support(Result, Evaluation_Result, pos_label=1) Accuracy = accuracy_score(Result, Evaluation_Result) #print(Evaluation) # print("Percision OF "+str(i+1)+"th training is " + str(TP/(TP+FP))) # print("Recall OF "+str(i+1)+"th training is " + str(TP/(TP+FN))) # print("F1 OF "+str(i+1)+"th training is " + str(2*TP/(2*TP+FP+FN)) + '\n\n') print("Accuracy OF " + str(i + 1) + "th training is " + str(Accuracy)) print("Percision OF " + str(i + 1) + "th training is " + str(Evaluation[0])) print("Recall OF " + str(i + 1) + "th training is " + str(Evaluation[1])) print("F1 OF " + str(i + 1) + "th training is " + str(Evaluation[2]) + '\n\n') percision += Evaluation[0] recall += Evaluation[1] F1 += Evaluation[2] accuracy += Accuracy #print("by tool") print("Now Get The Final Evaluation Of This Model:") print("Accuracy is " + str(accuracy / 5) + "\n") print("percision is " + str(percision / 5)) print("recall is " + str(recall / 5)) print("F1 is " + str(F1 / 5) + "\n")