def setUp(): possibleTargetColumns = ['MINIMAL', 'EQUIVALENT'] possibleClassifiers = getPossibleClassifiers() possiblePrograms = [ util.getPathName(program) for program in util.getPrograms() ] return possibleTargetColumns, possibleClassifiers, possiblePrograms
if executionMode is None: print( 'Please specify the execution mode trought --mode parameter. 1 For run and analyze | 2 For run | 3 For analyze' ) print('##### Exit #####') sys.exit() if allPrograms == False and program is None: print( 'Please specify the program to be executed through --program parameter or execute all through --allPrograms parameter' ) print('##### Exit #####') sys.exit() # Seta o diretório base de onde deverão estar os programas baseExperimentFolder = "{}/Programs".format(os.getcwd()) # Percorre todas as pastas dentro do diretório base programsFolder = util.getPrograms( baseExperimentFolder) if allPrograms else [ '{}/{}'.format(baseExperimentFolder, program) ] for subFolder in programsFolder: sourceProgram = '{}.c'.format(util.getPathName(subFolder)) print('### BEGIN ###') print('##########\t Executing ' + sourceProgram + '\t ' + util.formatNow() + '\t ##########') # Faz a execução do experimento passando como parâmetro a pasta desejada genMutants.main(baseExperimentFolder, subFolder, executionMode)
def classify_main(arguments): ''' Function responsible for receiving a mutant dataset and classifying those mutants as minimal, equivalent or traditional. ''' # Possible parameters possibleTargetColumns = getPossibleTargetColumns() possibleClassifiers = getPossibleClassifiers() possiblePrograms = [ util.getPathName(program) for program in util.getPrograms('{}/Programs'.format(os.getcwd())) ] # Parameters targetColumn = None allTargetColumns = False programToClassify = None classifier = None algorithmParameter = None executeAllPrograms = False executeBestClassifierForProgram = False programsBestClassifiers = None executeAllClassifiers = False executeAllParameters = False # Trought into all parameters for iCount in range(1, len(arguments), 1): arg = arguments[iCount] if arg == '--column': targetColumn = arguments[iCount + 1] elif arg == '--allColumns': allTargetColumns = True elif arg == '--program': programToClassify = arguments[iCount + 1] elif arg == '--allPrograms': executeAllPrograms = True elif arg == '--classifier': classifier = arguments[iCount + 1] elif arg == '--bestClassifier': executeBestClassifierForProgram = True programsBestClassifiers = analyzes.getBestClassifierForPrograms() elif arg == '--allClassifiers': executeAllClassifiers = True elif arg == '--parameter': algorithmParameter = int(arguments[iCount + 1]) elif arg == '--allParameters': executeAllParameters = True withoutProgramMessage = 'Please specify the program correctly. The {program} could be ' + str( possiblePrograms) withoutColumnMessage = 'Please specify the target column throught --column {targetColumn}. The {targetColumn} could be ' + str( possibleTargetColumns) withoutClassifierMessage = 'Please specify the classifier to be used throught --classifier {classifier}. The {classifier} could be ' + str( possibleClassifiers) errorMessage = '' if (targetColumn is None or not targetColumn in possibleTargetColumns) and allTargetColumns == False: errorMessage = '{}{}\n'.format(errorMessage, withoutColumnMessage) if programToClassify is None and executeAllPrograms == False: errorMessage = '{}{}\n'.format(errorMessage, withoutProgramMessage) if classifier is None and executeBestClassifierForProgram == False and executeAllClassifiers == False: errorMessage = '{}{}\n'.format(errorMessage, withoutClassifierMessage) if len(errorMessage) > 0: print(errorMessage) return if executeAllPrograms: programsToBeClassified = possiblePrograms.copy() else: programsToBeClassified = [programToClassify] if allTargetColumns: targetColumns = possibleTargetColumns.copy() else: targetColumns = [targetColumn] for column in targetColumns: for program in programsToBeClassified: if executeBestClassifierForProgram: classifier, _ = programsBestClassifiers['{}_{}'.format( program, column)] if executeAllClassifiers: classifiers = possibleClassifiers else: classifiers = [classifier] for _classifier in classifiers: if executeAllParameters: parameters = getPossibleParameters(_classifier) else: if _classifier == 'SVM' or _classifier == 'LDA' or _classifier == 'LR' or _classifier == 'GNB': parameters = [''] elif _classifier == 'KNN' and column == 'MINIMAL': parameters = [1] elif _classifier == 'KNN' and column == 'EQUIVALENT': parameters = [11] elif _classifier == 'DT' and column == 'MINIMAL': parameters = [15] elif _classifier == 'DT' and column == 'EQUIVALENT': parameters = [35] elif _classifier == 'RF' and column == 'MINIMAL': parameters = [5] elif _classifier == 'RF' and column == 'EQUIVALENT': parameters = [15] for parameter in parameters: complementClassifierName = '_{}'.format( _classifier) if executeAllClassifiers else '' complementClassifierName = '{baseName}{parameter}'.format( baseName=complementClassifierName, parameter='_{}'.format(parameter) if executeAllParameters else '') dataSetFileName = '{}/ML/Dataset/{}/Programs/{}.csv'.format( os.getcwd(), column, program) resultDataSetFileName = '{baseFolder}/ML/Results/{targetColumn}/Classification/{programName}{complement}.csv'.format( baseFolder=os.getcwd(), targetColumn=column, programName=program, complement=complementClassifierName) print( '\nProgram: {} | Column: {} | Classifier: {} | Parameter: {}' .format(program, column, _classifier, parameter)) if parameter != '': classify(dataSetFileName, resultDataSetFileName, column, _classifier, parameter, program)
def debug_main(arguments): ''' Main function performed at the time of running the experiment ''' # Possible parameters possibleTargetColumns = getPossibleTargetColumns() possibleClassifiers = getPossibleClassifiers() possiblePrograms = [ util.getPathName(program) for program in util.getPrograms('{}/Programs'.format(os.getcwd())) ] # Parameters targetColumn = None classifier = None columnsToDrop = [] columnsToAdd = [] program = None programByProgram = False executeWithBestParameter = False # Trought into all parameters for iCount in range(1, len(arguments), 1): arg = arguments[iCount] if arg == '--column': targetColumn = arguments[iCount + 1] elif arg == '--classifier': classifier = arguments[iCount + 1] elif arg == '--program': program = arguments[iCount + 1] elif arg == '--pbp': programByProgram = True elif arg == '--best': executeWithBestParameter = True # Set the best parameter if it is necessary parameter = bestParameter(targetColumn, classifier) if executeWithBestParameter else None if len(arguments) > 1: if arguments[ 1] == '--all': # Verify if it is for execute all classifiers with all classifications executeAll(possibleTargetColumns, possibleClassifiers, parameter, executeWithBestParameter=executeWithBestParameter) return elif arguments[ 1] == '--allPbP': #Verify if it is for execute all, but program a program executeAllEachProgram(possibleTargetColumns, possibleClassifiers, possiblePrograms, executeWithBestParameter) return withoutColumnMessage = 'Please specify the target column throught --column {targetColumn}. The {targetColumn} could be ' + str( possibleTargetColumns) withoutClassifierMessage = 'Please specify the classifier throught --classifier {classifier}. The {classifier} could be ' + str( possibleClassifiers) withoutProgramMessage = 'Please specify the program correctly. The {program} could be ' + str( possiblePrograms) errorMessage = '' if targetColumn is None or not targetColumn in possibleTargetColumns: errorMessage = '{}{}\n'.format(errorMessage, withoutColumnMessage) if classifier is None: errorMessage = '{}{}\n'.format(errorMessage, withoutClassifierMessage) if not program is None and not program in possiblePrograms: errorMessage = '{}{}\n'.format(errorMessage, withoutProgramMessage) if len(errorMessage) > 0: print(errorMessage) return # Execute cross validation if not programByProgram: crossValidation(targetColumn, classifier, program, columnsToDrop, columnsToAdd, parameter=parameter) else: for specifiedProgram in possiblePrograms: crossValidation(targetColumn, classifier, specifiedProgram, columnsToDrop, columnsToAdd, parameter=parameter)
def getPossiblePrograms(): possiblePrograms = [ util.getPathName(program) for program in util.getPrograms('{}/Programs'.format(os.getcwd())) ] return possiblePrograms
def analyzeClassificationsFromEachProgram(targetColumn, possiblePrograms, bestProgram_Classifier, overwrite=False): ''' Função responsável por fazer a análise dos resultados das classificações dos mutantes dos programas e obter as métricas para cada programa ''' baseFolder = '{}/ML/Results/{}/Classification'.format( os.getcwd(), targetColumn) fileName = 'ML_Metrics' # Falta criar o arquivo '{}/Metrics_AllClassifiers.csv'.format(baseFolder) neste arquivo metricsFile = '{}/ML/Results/{}/Classification/{}.csv'.format( os.getcwd(), targetColumn, fileName) mutantsMetrics = pd.DataFrame() if overwrite or not util.pathExists(metricsFile): for file in util.getFilesInFolder(baseFolder): programName = util.getPathName(file) programName = programName[:programName.find('.')] if programName in possiblePrograms: programInfo_ClassifierParameter = bestProgram_Classifier.query( 'Column == \'{}\' and Program == \'{}\''.format( targetColumn, programName)) classifier = programInfo_ClassifierParameter[ 'Classifier'].values[0] parameter = programInfo_ClassifierParameter[ 'Parameter'].values[0] accuracy, precision, recall, f1 = getMLMetricsFromClassificationFile( file, targetColumn, programName) newMutantsMetrics = pd.DataFrame(data=[[ programName, targetColumn, classifier, parameter, accuracy * 100, precision * 100, recall * 100, f1 * 100 ]], columns=[ 'ProgramName', 'Column', 'Classifier', 'Parameter', 'Accuracy', 'Precision', 'Recall', 'F1' ]) mutantsMetrics = mutantsMetrics.append(newMutantsMetrics) #print('Program: {}\tClassifier: {} | Parameter: {}\t\tAccuracy: {} | Precision: {} | Recall: {} | F1: {}'.format(programName, classifier, parameter, accuracy, precision, recall, f1)) mutantsMetrics['ProgramName.UPPER'] = mutantsMetrics[ "ProgramName"].str.upper() mutantsMetrics = mutantsMetrics.sort_values( by=['Column', 'ProgramName.UPPER']) del mutantsMetrics['ProgramName.UPPER'] util.writeDataFrameInCsvFile(metricsFile, mutantsMetrics) elif util.pathExists(metricsFile): mutantsMetrics = util.createDataFrameFromCSV(metricsFile, True) mutantsMetrics['ProgramName.UPPER'] = mutantsMetrics[ "ProgramName"].str.upper() mutantsMetrics = mutantsMetrics.sort_values( by=['Column', 'ProgramName.UPPER']) del mutantsMetrics['ProgramName.UPPER'] return mutantsMetrics
def getMetricsFromPrograms(possibleTargetColumns, possibleClassifiers, programsInfo, writeMetrics=False, bestParameter=False): fileFilter = '_bestParameter' if bestParameter else '' programs = [ util.getPathName(program) for program in util.getPrograms('{}/Programs'.format(os.getcwd())) ] i_program_Max = 1 # Index of max score row on ML/Results/[COLUMN]/Programs/[ProgramName]_[Classifier].csv i_program_First = 6 # Index of the first occurence row on ML/Results/[COLUMN]/Programs/[ProgramName]_[Classifier].csv (useful when it is using _bestParameterFile) i_program_SampleSplit = 0 # Index of sample split column on ML/Results/[COLUMN]/Programs/[ProgramName]_[Classifier].csv #i_program_Accuracy = 1 #i_program_Precision = 2 #i_program_Recall = 3 i_program_F1 = 4 # Index of F1 Score column on ML/Results/[COLUMN]/Programs/[ProgramName]_[Classifier].csv programsHeader = getProgramsHeader() columnsHeader = programsHeader.copy() columnsHeader.remove('Program') df_programsInfo = pd.DataFrame.from_dict(programsInfo, orient='index', columns=columnsHeader) # Column label on CSV programs info # MM_RF_F1 # MM_RF_SampleSplit # MM_DT_F1 # MM_DT_SampleSplit # MM_KNN_F1 # MM_KNN_SampleSplit # MM_SVM_F1 # MM_LDA_F1 # MM_LR_F1 # MM_GNB_F1 # EM_RF_F1 # EM_RF_SampleSplit # EM_DT_F1 # EM_DT_SampleSplit # EM_KNN_F1 # EM_KNN_SampleSplit # EM_SVM_F1 # EM_LDA_F1 # EM_LR_F1 # EM_GNB_F1 for program in programs: # Split the file in lines and columns (;) fileName = '{}/ML/Results/MINIMAL/Programs/{}_[CLASSIFIER]{}.csv'.format( os.getcwd(), program, fileFilter) file_Minimal_RF = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'RF')), ';') file_Minimal_DT = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'DT')), ';') file_Minimal_KNN = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'KNN')), ';') file_Minimal_SVM = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'SVM')), ';') file_Minimal_LDA = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'LDA')), ';') file_Minimal_LR = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'LR')), ';') file_Minimal_GNB = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'GNB')), ';') fileName = '{}/ML/Results/EQUIVALENT/Programs/{}_[CLASSIFIER]{}.csv'.format( os.getcwd(), program, fileFilter) file_Equivalent_RF = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'RF')), ';') file_Equivalent_DT = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'DT')), ';') file_Equivalent_KNN = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'KNN')), ';') file_Equivalent_SVM = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'SVM')), ';') file_Equivalent_LDA = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'LDA')), ';') file_Equivalent_LR = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'LR')), ';') file_Equivalent_GNB = util.splitFileInColumns( bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'GNB')), ';') # Update the metrics of programs info df_programsInfo.loc[program]['MM_RF_F1'] = file_Minimal_RF[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['MM_RF_SampleSplit'] = file_Minimal_RF[ i_program_First][i_program_SampleSplit] df_programsInfo.loc[program]['MM_DT_F1'] = file_Minimal_DT[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['MM_DT_SampleSplit'] = file_Minimal_DT[ i_program_First][i_program_SampleSplit] df_programsInfo.loc[program]['MM_KNN_F1'] = file_Minimal_KNN[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['MM_KNN_SampleSplit'] = file_Minimal_KNN[ i_program_First][i_program_SampleSplit] df_programsInfo.loc[program]['MM_SVM_F1'] = file_Minimal_SVM[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['MM_LDA_F1'] = file_Minimal_LDA[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['MM_LR_F1'] = file_Minimal_LR[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['MM_GNB_F1'] = file_Minimal_GNB[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['EM_RF_F1'] = file_Equivalent_RF[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['EM_RF_SampleSplit'] = file_Equivalent_RF[ i_program_First][i_program_SampleSplit] df_programsInfo.loc[program]['EM_DT_F1'] = file_Equivalent_DT[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['EM_DT_SampleSplit'] = file_Equivalent_DT[ i_program_First][i_program_SampleSplit] df_programsInfo.loc[program]['EM_KNN_F1'] = file_Equivalent_KNN[ i_program_Max][i_program_F1] df_programsInfo.loc[program][ 'EM_KNN_SampleSplit'] = file_Equivalent_KNN[i_program_First][ i_program_SampleSplit] df_programsInfo.loc[program]['EM_SVM_F1'] = file_Equivalent_SVM[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['EM_LDA_F1'] = file_Equivalent_LDA[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['EM_LR_F1'] = file_Equivalent_LR[ i_program_Max][i_program_F1] df_programsInfo.loc[program]['EM_GNB_F1'] = file_Equivalent_GNB[ i_program_Max][i_program_F1] if (writeMetrics): # Writting program info programsInfoFileName = '{}/Programs/ProgramsInfo.csv'.format( os.getcwd()) data = [] data.append(programsHeader) for index, values in df_programsInfo.iterrows(): values = list(values.values) values.insert(0, index) data.append(values) util.writeInCsvFile(programsInfoFileName, data, delimiter=',') return df_programsInfo