Beispiel #1
0
    def pruneTree(self):

        # iterator_stop:
        stop = 100

        # record attributes about max accuracy
        self.maxAttr = []

        # set bestTree to original tree
        bestTree = self.treeroot

        # create validation handler
        bestvalidat = validation(self.filepath, bestTree)
        validat = validation(self.filepath, None)

        # calculate default max accuracy
        maxaccu = bestvalidat.calculateAccuracy()

        for i in range(stop):

            # copy current best tree to currentTree for further operation
            currentTree = copy.deepcopy(bestTree)
            validat.setTreeRoot(currentTree)

            # set up a temporary list to record deleted nodes
            tmp = []

            for n in range(self.nodeNum):

                # convert currentTree to a list of tree nodes
                self.treeToList(currentTree)

                if len(self.treeList) <= 3:
                    break
                # generate random Numbers
                j = self.randomNumberGenerator()
                self.treeList[j].isLeaf = 1
                self.treeList[j].left = None
                self.treeList[j].right = None
                tmp.append(self.treeList[j])

            newaccu = validat.calculateAccuracy()
            if newaccu > maxaccu:
                self.maxAccuracy = newaccu
                maxaccu = newaccu
                bestTree = currentTree

        self.maxAttr = [self.tree.attrset[i.attr] for i in tmp]

        # reset the original training tree to best tree
        self.tree.tree = bestTree
        self.treeroot = bestTree
Beispiel #2
0
    def pruneTree(self):
        
        # iterator_stop:
        stop = 100
        
        # record attributes about max accuracy
        self.maxAttr = []
         
        # set bestTree to original tree
        bestTree = self.treeroot   
        
        # create validation handler
        bestvalidat = validation(self.filepath,bestTree)  
        validat = validation(self.filepath,None)
        
        # calculate default max accuracy
        maxaccu = bestvalidat.calculateAccuracy()
        
        for i in range(stop):
            
            # copy current best tree to currentTree for further operation
            currentTree = copy.deepcopy(bestTree)           
            validat.setTreeRoot(currentTree)
            
            # set up a temporary list to record deleted nodes
            tmp = []
            
            for n in range(self.nodeNum):
                
                # convert currentTree to a list of tree nodes
                self.treeToList(currentTree)
                     
                if len(self.treeList) <= 3:
                    break
                # generate random Numbers
                j = self.randomNumberGenerator() 
                self.treeList[j].isLeaf = 1
                self.treeList[j].left = None 
                self.treeList[j].right = None
                tmp.append(self.treeList[j])
           
            newaccu = validat.calculateAccuracy()
            if newaccu > maxaccu:
                self.maxAccuracy = newaccu
                maxaccu = newaccu
                bestTree = currentTree

        self.maxAttr = [self.tree.attrset[i.attr] for i in tmp]
        
        # reset the original training tree to best tree
        self.tree.tree = bestTree
        self.treeroot = bestTree
Beispiel #3
0
 def printAccuracy(self,testPath,filehandle):  
     tmp = validation(testPath,self.treeroot)
     sys.stdout = filehandle
     print('\n|-----------------------------------------------------------------------------------------------|')
     print('\n |--number of nodes tried to prune: ',self.nodeNum,'--|--starting node number: ',self.levelcontrol,' --|')
     print('\n Node selected to prune: ',self.maxAttr)
     print(' The prediction accuracy on given data set after pruning the tree is ',format(tmp.calculateAccuracy(),'5.2%'))
     print('|-----------------------------------------------------------------------------------------------|\n')    
     sys.stdout = sys.__stdout__
Beispiel #4
0
    def test_sign(self):
        from Validation import validation

        message_info = {
            'email_id': r'"Иван Иванович" <*****@*****.**>',
            'body_of_msg': r'Добрый день, вот лабораторная http://github.com',
            'head_of_msg': r'ТРПО. Лабораторная работа №3',
        }

        valid_dict = validation(message_info['head_of_msg'], message_info['body_of_msg'], '"Иван Иванович"')

        self.assertEqual(valid_dict, {'Number': '3', 'URL': ['http://github.com'], 'errorDescription': ['Отсутствует подпись']})
Beispiel #5
0
    def test_all_positive(self):
        from Validation import validation

        message_info = {
            'email_id': r'"Иван Иванович" <*****@*****.**>',
            'body_of_msg': r'Добрый день, вот лабораторная http://github.com -- С уважением, Иван Иванович 18-ИСбо-2а',
            'head_of_msg': r'ТРПО. Лабораторная работа №3',
        }

        valid_dict = validation(message_info['head_of_msg'], message_info['body_of_msg'], 'Иван Иванович')

        self.assertEqual(valid_dict, {'Number': '3', 'URL': ['http://github.com'], 'errorDescription': []})
Beispiel #6
0
    def test_title(self):
        from Validation import validation

        message_info = {
            'email_id': r'"Иван Иванович" <*****@*****.**>',
            'body_of_msg': r'Добрый день, вот лабораторная http://github.com -- С уважением, Иван Иванович 18-ИСбо-2а',
            'head_of_msg': r'Лаба',
        }

        valid_dict = validation(message_info['head_of_msg'], message_info['body_of_msg'], '"Иван Иванович"')

        self.assertEqual(valid_dict, {'Number': '', 'URL': ['http://github.com'],
                                      'errorDescription': ['Нет номера лабораторной работы', 'Неверно указана дисциплина']})
Beispiel #7
0
 def printAccuracy(self, testPath, filehandle):
     tmp = validation(testPath, self.treeroot)
     sys.stdout = filehandle
     print(
         '\n|-----------------------------------------------------------------------------------------------|'
     )
     print('\n |--number of nodes tried to prune: ', self.nodeNum,
           '--|--starting node number: ', self.levelcontrol, ' --|')
     print('\n Node selected to prune: ', self.maxAttr)
     print(
         ' The prediction accuracy on given data set after pruning the tree is ',
         format(tmp.calculateAccuracy(), '5.2%'))
     print(
         '|-----------------------------------------------------------------------------------------------|\n'
     )
     sys.stdout = sys.__stdout__
Beispiel #8
0
    def test_all_negative(self):
        from Validation import validation

        message_info = {
            'email_id': r'"Иван Иванович" <*****@*****.**>',
            'body_of_msg': r'Вот лабораторная http://github.com',
            'head_of_msg': r'Лаба',
        }

        valid_dict = validation(message_info['head_of_msg'], message_info['body_of_msg'], '"Иван Иванович"')

        self.assertEqual(valid_dict, {'Number': '', 'URL': ['http://github.com'],
                                      'errorDescription': ['Отсутствует подпись',
                                                           'Нет приветствия',
                                                           'Нет номера лабораторной работы',
                                                           'Неверно указана дисциплина']})
Beispiel #9
0
            if not email:
                send_message(service, USER_ID, email_name, email_name_surname,
                             3, None, None, message_info)
                logger.warning(r"main: Email don't exist in table_valid")
                print(r"main: Email don't exist in table_valid")
            else:
                # Получение группы пользователя
                result = search_group(email)
                group_user = result[0]
                group_name_surname = result[1]
                # Выставление его в журнал, если отсутствует
                result_add_table = add_table(group_user, group_name_surname)[0]

                if result_add_table == 'available' or result_add_table == 'accepted':
                    # Проверка валидации письма
                    valid_dict = validation(message_info['head_of_msg'],
                                            message_info['body_of_msg'])
                    if len(valid_dict["errorDescription"]) > 0:
                        send_message(service, USER_ID, email_name,
                                     email_name_surname, 2, valid_dict,
                                     valid_dict, message_info)
                        logger.warning(
                            r"main: Message failed validation. Email_id :%s" %
                            email_id)
                        print(
                            f"main: Message failed validation. Email_id :{email_id}"
                        )
                    else:
                        # Получение результата из модуля проверки
                        answer = 1  # check_lab(valid_dict['URL'], valid_dict['Number'])['grade']
                        logger.info(
                            r"main: Receiving a response from the verification module. Mark in table :%s"
Beispiel #10
0
def main():

    # check if the command form is correct
    if len(sys.argv)<6:
        print(" Correct Input Form:\n --python3--|--main.py--|--number of nodes to prune--|--training set path--|--validation set path --|--test set path--|--print or not--|")
        sys.exit("Error: Missing Arguments!\n exited").gettrace()
    # set home directory as the  PATH
    PATH = './Data/'
    # read arguments from the command
    pruneNum = int(sys.argv[1])
    trainingPath = PATH+sys.argv[2]
    validationPath = PATH+sys.argv[3]
    testPath = PATH+sys.argv[4]
    printTreeVal = int(sys.argv[5])
 
    # set Information entropy threshold to be 0, under which we regard it as a pure node
    threshold = 0

     
    # create a directory storing output data files
    os.makedirs('./Data/output',exist_ok=True)
    
    ########################     IG-based Tree          ################################
    ####################################################################################
    # build a decision tree and train it
    decisiontree = decisionTree(trainingPath,threshold)
    
    # file store the tree structure before pruning
    filehandle = open('./Data/output/original_tree.txt','w')
    
    # print tree
    decisiontree.printTree(filehandle) 
      
    # close file
    filehandle.close()
    
    #####################################################################################
    # file storing the accuracy information of the tree before pruning
    filehandle = open('./Data/output/accuracy.txt','a')
    filehandle.seek(0)
    filehandle.truncate()
    #####################################################################################
    
    # check accuracy with validation data set before pruning
    treeValid = validation(testPath,decisiontree.getTree()) 
    
    # print the accuracy on test data set
    treeValid.printAccuracy(filehandle)
    
    # print the average depth and total number of nodes
    decisiontree.printLevNod(filehandle)
    
    
    # prune the tree
    if pruneNum != 0:
        prunetree = prune(decisiontree,validationPath,pruneNum,0)
        prunetree.pruneTree()
        
        prunetree.printAccuracy(testPath,filehandle)
        
        # print the average depth and total number of nodes
        decisiontree.printLevNod(filehandle)
        filehandle.close()
        if printTreeVal == 1:
            ##############################################################################
            # create a file to store the IG-built tree after post-prune
            file = open('./Data/output/postprune_tree.txt','w')
            ##############################################################################
            prunetree.printTree(file) 
            ##############################################################################
            file.close()              
    
    ######################        Random-built Tree            #########################
    ####################################################################################
    # create a file storing the tree structure 
    filehandle = open('./Data/output/rand_original_tree.txt','w')
    
    # build a decision tree and train it by randomly selecting attributes
    decisiontree = randDecisionTree(trainingPath,threshold)
    
    
    # print tree
    decisiontree.printTree(filehandle) 
      
    # close file
    filehandle.close()
    
    #####################################################################################
    # file storing the accuracy information of the tree before pruning
    filehandle = open('./Data/output/rand_accuracy.txt','a')
    filehandle.seek(0)
    filehandle.truncate()
    #####################################################################################
    
    # check accuracy with validation data set before pruning
    treeValid = validation(testPath,decisiontree.getTree()) 
    
    # print the accuracy on test data set
    treeValid.printAccuracy(filehandle)
    
    # print the average depth and total number of nodes
    decisiontree.printLevNod(filehandle)
    
    # prune the tree
    if pruneNum != 0:
        prunetree = prune(decisiontree,validationPath,pruneNum,0)
        prunetree.pruneTree()
        
        prunetree.printAccuracy(testPath,filehandle)
        
        # print the average depth and total number of nodes
        decisiontree.printLevNod(filehandle)
        filehandle.close()
        
        if printTreeVal == 1:
            ##############################################################################
            # create a file to store the tree after post-prune
            file = open('./Data/output/rand_postprune_tree.txt','w')
            ##############################################################################
            prunetree.printTree(file)      
            ##############################################################################
            file.close()            
Beispiel #11
0
def main():

    # check if the command form is correct
    if len(sys.argv) < 6:
        print(
            " Correct Input Form:\n --python3--|--main.py--|--number of nodes to prune--|--training set path--|--validation set path --|--test set path--|--print or not--|"
        )
        sys.exit("Error: Missing Arguments!\n exited").gettrace()
    # set home directory as the  PATH
    PATH = './Data/'
    # read arguments from the command
    pruneNum = int(sys.argv[1])
    trainingPath = PATH + sys.argv[2]
    validationPath = PATH + sys.argv[3]
    testPath = PATH + sys.argv[4]
    printTreeVal = int(sys.argv[5])

    # set Information entropy threshold to be 0, under which we regard it as a pure node
    threshold = 0

    # create a directory storing output data files
    os.makedirs('./Data/output', exist_ok=True)

    ########################     IG-based Tree          ################################
    ####################################################################################
    # build a decision tree and train it
    decisiontree = decisionTree(trainingPath, threshold)

    # file store the tree structure before pruning
    filehandle = open('./Data/output/original_tree.txt', 'w')

    # print tree
    decisiontree.printTree(filehandle)

    # close file
    filehandle.close()

    #####################################################################################
    # file storing the accuracy information of the tree before pruning
    filehandle = open('./Data/output/accuracy.txt', 'a')
    filehandle.seek(0)
    filehandle.truncate()
    #####################################################################################

    # check accuracy with validation data set before pruning
    treeValid = validation(testPath, decisiontree.getTree())

    # print the accuracy on test data set
    treeValid.printAccuracy(filehandle)

    # print the average depth and total number of nodes
    decisiontree.printLevNod(filehandle)

    # prune the tree
    if pruneNum != 0:
        prunetree = prune(decisiontree, validationPath, pruneNum, 0)
        prunetree.pruneTree()

        prunetree.printAccuracy(testPath, filehandle)

        # print the average depth and total number of nodes
        decisiontree.printLevNod(filehandle)
        filehandle.close()
        if printTreeVal == 1:
            ##############################################################################
            # create a file to store the IG-built tree after post-prune
            file = open('./Data/output/postprune_tree.txt', 'w')
            ##############################################################################
            prunetree.printTree(file)
            ##############################################################################
            file.close()

    ######################        Random-built Tree            #########################
    ####################################################################################
    # create a file storing the tree structure
    filehandle = open('./Data/output/rand_original_tree.txt', 'w')

    # build a decision tree and train it by randomly selecting attributes
    decisiontree = randDecisionTree(trainingPath, threshold)

    # print tree
    decisiontree.printTree(filehandle)

    # close file
    filehandle.close()

    #####################################################################################
    # file storing the accuracy information of the tree before pruning
    filehandle = open('./Data/output/rand_accuracy.txt', 'a')
    filehandle.seek(0)
    filehandle.truncate()
    #####################################################################################

    # check accuracy with validation data set before pruning
    treeValid = validation(testPath, decisiontree.getTree())

    # print the accuracy on test data set
    treeValid.printAccuracy(filehandle)

    # print the average depth and total number of nodes
    decisiontree.printLevNod(filehandle)

    # prune the tree
    if pruneNum != 0:
        prunetree = prune(decisiontree, validationPath, pruneNum, 0)
        prunetree.pruneTree()

        prunetree.printAccuracy(testPath, filehandle)

        # print the average depth and total number of nodes
        decisiontree.printLevNod(filehandle)
        filehandle.close()

        if printTreeVal == 1:
            ##############################################################################
            # create a file to store the tree after post-prune
            file = open('./Data/output/rand_postprune_tree.txt', 'w')
            ##############################################################################
            prunetree.printTree(file)
            ##############################################################################
            file.close()