Esempio n. 1
0
    def Train_Model(self):
        print("Now, It is training all of this set!")
        util_feature = FeatureExtraction.Util_Feature_Extraction()

        train = self.GetTraingData()
        Py_0 = 0.0
        Py_1 = 0.0
        Pfeature = {}
        lastPfeature = {}
        tmp_train = []

        for line in train:
            tmp_train.append(line)

        tmp_feature = util_feature.GetFeature(tmp_train, self.NumOfFeature,
                                              self.OutputFeature,
                                              self.OutputAllFeature)

        # + -
        for f in tmp_feature:
            Py_0 += self.E_p_base
            Py_1 += self.E_p_base
            Pfeature[f] = [self.E_p_base, self.E_p_base]

        for line in tmp_train:
            if line[0] == '0':

                for col in line[1:len(line)]:

                    if col in Pfeature:
                        Py_0 += 1
                        Pfeature[col][1] += 1
            if line[0] == '1':

                for col in line[1:len(line)]:

                    if col in Pfeature:
                        Py_1 += 1
                        Pfeature[col][0] += 1
        self.StoreModel(Py_0, Py_1, Pfeature)
        print("Training all the set is down, The model is kept in the " +
              self.ModelFile)
Esempio n. 2
0
    def Training(self, fold):
        print("Evaluate the model by " + str(fold) +
              "-fold(it will cost some time to training the model)......\n")
        util_feature = FeatureExtraction.Util_Feature_Extraction()

        train = self.GetTraingData()
        accuracy = 0.0
        percision = [0.0, 0.0]
        recall = [0.0, 0.0]
        F1 = [0.0, 0.0]
        for i in range(fold):
            Py_0 = 0.0
            Py_1 = 0.0
            Pfeature = {}
            lastPfeature = {}
            tmp_train = []
            #print(len(train))
            for line in train[0:int((i) * len(train) / fold)]:  #
                tmp_train.append(line)

            for line in train[int((i + 1) * len(train) /
                                  fold):int((fold) * len(train) / fold)]:  #
                tmp_train.append(line)

            tmp_feature = util_feature.GetFeature(tmp_train, self.NumOfFeature,
                                                  self.OutputFeature,
                                                  self.OutputAllFeature)

            # + -
            for f in tmp_feature:
                Py_0 += self.E_p_base
                Py_1 += self.E_p_base
                Pfeature[f] = [self.E_p_base, self.E_p_base]

            for line in tmp_train:
                if line[0] == '0':

                    for col in line[1:len(line)]:
                        if col in Pfeature:
                            Py_0 += 1
                            Pfeature[col][1] += 1
                if line[0] == '1':

                    for col in line[1:len(line)]:

                        if col in Pfeature:
                            Py_1 += 1
                            Pfeature[col][0] += 1

            #for f in Pfeature:
            #if(Pfeature[f][0]+Pfeature[f][1] != Py_0+Py_1)
            #print(Py_1+Py_0)
            TP = 0.0
            FP = 0.0
            TN = 0.0
            FN = 0.0
            print(str(i + 1) + "th training is down! \n")

            Evaluation_Result = []
            Result = []
            for line in train[int((i) * len(train) /
                                  fold):int((i + 1) * len(train) / fold)]:  #
                current_P1 = math.log((Py_1 / (Py_0 + Py_1)), 10)
                current_P0 = math.log((Py_0 / (Py_0 + Py_1)), 10)
                for f in Pfeature:
                    if f in line[1:len(line)]:
                        current_P1 += math.log(1.25 * Pfeature[f][0] / Py_1,
                                               10)
                        current_P0 += math.log(2 * Pfeature[f][1] / Py_0, 10)
                #for col in line[1:len(line)-1]:

                #if col in Pfeature:
                #current_P1 *= Pfeature[col][0]/Py_1
                #current_P0 *= Pfeature[col][1]/Py_0
                if (current_P1 == 0 or current_P0 == 0):
                    print("equal to 0 --->error")
                    exit()
                if current_P1 >= current_P0:
                    Evaluation_Result.append(1)
                else:
                    Evaluation_Result.append(0)
                if line[0] == '1':
                    Result.append(1)
                elif line[0] == '0':
                    Result.append(0)
                # '''
                # if (current_P1 >= current_P0 and line[0] == '1'):
                # 	TN +=1
                # elif (current_P1 < current_P0 and line[0] == '0'):
                # 	TP += 1
                # elif (current_P1 >= current_P0 and line[0] == '0'):
                # 	FN += 1
                # elif (current_P1 < current_P0 and line[0] == '1'):
                # 	FP += 1
                # else:
                # 	print("error result")
                # 	'''
                # '''
                # if (current_P1 >= current_P0 and line[0] == '1'):
                # 	TP +=1
                # elif (current_P1 < current_P0 and line[0] == '0'):
                # 	TN += 1
                # elif (current_P1 >= current_P0 and line[0] == '0'):
                # 	FP += 1
                # elif (current_P1 < current_P0 and line[0] == '1'):
                # 	FN += 1
                # else:
                # 	print("error result")
                # '''
            #print("Right is"+ str((TP+TN)/(TP+TN+FP+FN)))
            Evaluation = precision_recall_fscore_support(Result,
                                                         Evaluation_Result,
                                                         pos_label=1)
            Accuracy = accuracy_score(Result, Evaluation_Result)
            #print(Evaluation)
            # print("Percision OF "+str(i+1)+"th training is " + str(TP/(TP+FP)))
            # print("Recall OF "+str(i+1)+"th training is " + str(TP/(TP+FN)))
            # print("F1 OF "+str(i+1)+"th training is " + str(2*TP/(2*TP+FP+FN)) + '\n\n')
            print("Accuracy OF " + str(i + 1) + "th training is " +
                  str(Accuracy))
            print("Percision OF " + str(i + 1) + "th training is " +
                  str(Evaluation[0]))
            print("Recall OF " + str(i + 1) + "th training is " +
                  str(Evaluation[1]))
            print("F1 OF " + str(i + 1) + "th training is " +
                  str(Evaluation[2]) + '\n\n')
            percision += Evaluation[0]
            recall += Evaluation[1]
            F1 += Evaluation[2]
            accuracy += Accuracy
            #print("by tool")
        print("Now Get The Final Evaluation Of This Model:")
        print("Accuracy is " + str(accuracy / 5) + "\n")
        print("percision is " + str(percision / 5))
        print("recall is " + str(recall / 5))
        print("F1 is " + str(F1 / 5) + "\n")