Ejemplo n.º 1
0
def setUp():
    possibleTargetColumns = ['MINIMAL', 'EQUIVALENT']
    possibleClassifiers = getPossibleClassifiers()
    possiblePrograms = [
        util.getPathName(program) for program in util.getPrograms()
    ]

    return possibleTargetColumns, possibleClassifiers, possiblePrograms
Ejemplo n.º 2
0
    if executionMode is None:
        print(
            'Please specify the execution mode trought --mode parameter. 1 For run and analyze | 2 For run | 3 For analyze'
        )
        print('##### Exit #####')
        sys.exit()

    if allPrograms == False and program is None:
        print(
            'Please specify the program to be executed through --program parameter or execute all through --allPrograms parameter'
        )
        print('##### Exit #####')
        sys.exit()

    # Seta o diretório base de onde deverão estar os programas
    baseExperimentFolder = "{}/Programs".format(os.getcwd())

    # Percorre todas as pastas dentro do diretório base
    programsFolder = util.getPrograms(
        baseExperimentFolder) if allPrograms else [
            '{}/{}'.format(baseExperimentFolder, program)
        ]
    for subFolder in programsFolder:

        sourceProgram = '{}.c'.format(util.getPathName(subFolder))
        print('### BEGIN ###')
        print('##########\t   Executing ' + sourceProgram + '\t ' +
              util.formatNow() + '\t   ##########')

        # Faz a execução do experimento passando como parâmetro a pasta desejada
        genMutants.main(baseExperimentFolder, subFolder, executionMode)
Ejemplo n.º 3
0
def classify_main(arguments):
    '''
		Function responsible for receiving a mutant dataset and classifying those mutants as minimal, equivalent or traditional.
	'''
    # Possible parameters
    possibleTargetColumns = getPossibleTargetColumns()
    possibleClassifiers = getPossibleClassifiers()
    possiblePrograms = [
        util.getPathName(program)
        for program in util.getPrograms('{}/Programs'.format(os.getcwd()))
    ]

    # Parameters
    targetColumn = None
    allTargetColumns = False
    programToClassify = None
    classifier = None
    algorithmParameter = None
    executeAllPrograms = False
    executeBestClassifierForProgram = False
    programsBestClassifiers = None
    executeAllClassifiers = False
    executeAllParameters = False

    # Trought into all parameters
    for iCount in range(1, len(arguments), 1):
        arg = arguments[iCount]
        if arg == '--column':
            targetColumn = arguments[iCount + 1]
        elif arg == '--allColumns':
            allTargetColumns = True
        elif arg == '--program':
            programToClassify = arguments[iCount + 1]
        elif arg == '--allPrograms':
            executeAllPrograms = True
        elif arg == '--classifier':
            classifier = arguments[iCount + 1]
        elif arg == '--bestClassifier':
            executeBestClassifierForProgram = True
            programsBestClassifiers = analyzes.getBestClassifierForPrograms()
        elif arg == '--allClassifiers':
            executeAllClassifiers = True
        elif arg == '--parameter':
            algorithmParameter = int(arguments[iCount + 1])
        elif arg == '--allParameters':
            executeAllParameters = True

    withoutProgramMessage = 'Please specify the program correctly. The {program} could be ' + str(
        possiblePrograms)
    withoutColumnMessage = 'Please specify the target column throught --column {targetColumn}. The {targetColumn} could be ' + str(
        possibleTargetColumns)
    withoutClassifierMessage = 'Please specify the classifier to be used throught --classifier {classifier}. The {classifier} could be ' + str(
        possibleClassifiers)
    errorMessage = ''

    if (targetColumn is None or not targetColumn
            in possibleTargetColumns) and allTargetColumns == False:
        errorMessage = '{}{}\n'.format(errorMessage, withoutColumnMessage)

    if programToClassify is None and executeAllPrograms == False:
        errorMessage = '{}{}\n'.format(errorMessage, withoutProgramMessage)

    if classifier is None and executeBestClassifierForProgram == False and executeAllClassifiers == False:
        errorMessage = '{}{}\n'.format(errorMessage, withoutClassifierMessage)

    if len(errorMessage) > 0:
        print(errorMessage)
        return

    if executeAllPrograms:
        programsToBeClassified = possiblePrograms.copy()
    else:
        programsToBeClassified = [programToClassify]

    if allTargetColumns:
        targetColumns = possibleTargetColumns.copy()
    else:
        targetColumns = [targetColumn]

    for column in targetColumns:
        for program in programsToBeClassified:
            if executeBestClassifierForProgram:
                classifier, _ = programsBestClassifiers['{}_{}'.format(
                    program, column)]

            if executeAllClassifiers:
                classifiers = possibleClassifiers
            else:
                classifiers = [classifier]

            for _classifier in classifiers:

                if executeAllParameters:
                    parameters = getPossibleParameters(_classifier)
                else:
                    if _classifier == 'SVM' or _classifier == 'LDA' or _classifier == 'LR' or _classifier == 'GNB':
                        parameters = ['']
                    elif _classifier == 'KNN' and column == 'MINIMAL':
                        parameters = [1]
                    elif _classifier == 'KNN' and column == 'EQUIVALENT':
                        parameters = [11]
                    elif _classifier == 'DT' and column == 'MINIMAL':
                        parameters = [15]
                    elif _classifier == 'DT' and column == 'EQUIVALENT':
                        parameters = [35]
                    elif _classifier == 'RF' and column == 'MINIMAL':
                        parameters = [5]
                    elif _classifier == 'RF' and column == 'EQUIVALENT':
                        parameters = [15]

                for parameter in parameters:
                    complementClassifierName = '_{}'.format(
                        _classifier) if executeAllClassifiers else ''
                    complementClassifierName = '{baseName}{parameter}'.format(
                        baseName=complementClassifierName,
                        parameter='_{}'.format(parameter)
                        if executeAllParameters else '')
                    dataSetFileName = '{}/ML/Dataset/{}/Programs/{}.csv'.format(
                        os.getcwd(), column, program)
                    resultDataSetFileName = '{baseFolder}/ML/Results/{targetColumn}/Classification/{programName}{complement}.csv'.format(
                        baseFolder=os.getcwd(),
                        targetColumn=column,
                        programName=program,
                        complement=complementClassifierName)

                    print(
                        '\nProgram: {} | Column: {} | Classifier: {} | Parameter: {}'
                        .format(program, column, _classifier, parameter))
                    if parameter != '':
                        classify(dataSetFileName, resultDataSetFileName,
                                 column, _classifier, parameter, program)
Ejemplo n.º 4
0
def debug_main(arguments):
    '''
		Main function performed at the time of running the experiment
	'''
    # Possible parameters
    possibleTargetColumns = getPossibleTargetColumns()
    possibleClassifiers = getPossibleClassifiers()
    possiblePrograms = [
        util.getPathName(program)
        for program in util.getPrograms('{}/Programs'.format(os.getcwd()))
    ]

    # Parameters
    targetColumn = None
    classifier = None
    columnsToDrop = []
    columnsToAdd = []
    program = None
    programByProgram = False
    executeWithBestParameter = False

    # Trought into all parameters
    for iCount in range(1, len(arguments), 1):
        arg = arguments[iCount]
        if arg == '--column':
            targetColumn = arguments[iCount + 1]
        elif arg == '--classifier':
            classifier = arguments[iCount + 1]
        elif arg == '--program':
            program = arguments[iCount + 1]
        elif arg == '--pbp':
            programByProgram = True
        elif arg == '--best':
            executeWithBestParameter = True

    # Set the best parameter if it is necessary
    parameter = bestParameter(targetColumn,
                              classifier) if executeWithBestParameter else None

    if len(arguments) > 1:
        if arguments[
                1] == '--all':  # Verify if it is for execute all classifiers with all classifications
            executeAll(possibleTargetColumns,
                       possibleClassifiers,
                       parameter,
                       executeWithBestParameter=executeWithBestParameter)
            return
        elif arguments[
                1] == '--allPbP':  #Verify if it is for execute all, but program a program
            executeAllEachProgram(possibleTargetColumns, possibleClassifiers,
                                  possiblePrograms, executeWithBestParameter)
            return

    withoutColumnMessage = 'Please specify the target column throught --column {targetColumn}. The {targetColumn} could be ' + str(
        possibleTargetColumns)
    withoutClassifierMessage = 'Please specify the classifier throught --classifier {classifier}. The {classifier} could be ' + str(
        possibleClassifiers)
    withoutProgramMessage = 'Please specify the program correctly. The {program} could be ' + str(
        possiblePrograms)
    errorMessage = ''
    if targetColumn is None or not targetColumn in possibleTargetColumns:
        errorMessage = '{}{}\n'.format(errorMessage, withoutColumnMessage)

    if classifier is None:
        errorMessage = '{}{}\n'.format(errorMessage, withoutClassifierMessage)

    if not program is None and not program in possiblePrograms:
        errorMessage = '{}{}\n'.format(errorMessage, withoutProgramMessage)

    if len(errorMessage) > 0:
        print(errorMessage)
        return

    # Execute cross validation
    if not programByProgram:
        crossValidation(targetColumn,
                        classifier,
                        program,
                        columnsToDrop,
                        columnsToAdd,
                        parameter=parameter)
    else:
        for specifiedProgram in possiblePrograms:
            crossValidation(targetColumn,
                            classifier,
                            specifiedProgram,
                            columnsToDrop,
                            columnsToAdd,
                            parameter=parameter)
Ejemplo n.º 5
0
def getPossiblePrograms():
    possiblePrograms = [
        util.getPathName(program)
        for program in util.getPrograms('{}/Programs'.format(os.getcwd()))
    ]
    return possiblePrograms
Ejemplo n.º 6
0
def analyzeClassificationsFromEachProgram(targetColumn,
                                          possiblePrograms,
                                          bestProgram_Classifier,
                                          overwrite=False):
    '''
		Função responsável por fazer a análise dos resultados das classificações dos mutantes dos programas e obter as métricas para cada programa
	'''
    baseFolder = '{}/ML/Results/{}/Classification'.format(
        os.getcwd(), targetColumn)
    fileName = 'ML_Metrics'
    # Falta criar o arquivo '{}/Metrics_AllClassifiers.csv'.format(baseFolder) neste arquivo
    metricsFile = '{}/ML/Results/{}/Classification/{}.csv'.format(
        os.getcwd(), targetColumn, fileName)

    mutantsMetrics = pd.DataFrame()

    if overwrite or not util.pathExists(metricsFile):

        for file in util.getFilesInFolder(baseFolder):
            programName = util.getPathName(file)
            programName = programName[:programName.find('.')]

            if programName in possiblePrograms:
                programInfo_ClassifierParameter = bestProgram_Classifier.query(
                    'Column == \'{}\' and Program == \'{}\''.format(
                        targetColumn, programName))
                classifier = programInfo_ClassifierParameter[
                    'Classifier'].values[0]
                parameter = programInfo_ClassifierParameter[
                    'Parameter'].values[0]

                accuracy, precision, recall, f1 = getMLMetricsFromClassificationFile(
                    file, targetColumn, programName)
                newMutantsMetrics = pd.DataFrame(data=[[
                    programName, targetColumn, classifier, parameter,
                    accuracy * 100, precision * 100, recall * 100, f1 * 100
                ]],
                                                 columns=[
                                                     'ProgramName', 'Column',
                                                     'Classifier', 'Parameter',
                                                     'Accuracy', 'Precision',
                                                     'Recall', 'F1'
                                                 ])
                mutantsMetrics = mutantsMetrics.append(newMutantsMetrics)

                #print('Program: {}\tClassifier: {} | Parameter: {}\t\tAccuracy: {} | Precision: {} | Recall: {} | F1: {}'.format(programName, classifier, parameter, accuracy, precision, recall, f1))

        mutantsMetrics['ProgramName.UPPER'] = mutantsMetrics[
            "ProgramName"].str.upper()
        mutantsMetrics = mutantsMetrics.sort_values(
            by=['Column', 'ProgramName.UPPER'])
        del mutantsMetrics['ProgramName.UPPER']

        util.writeDataFrameInCsvFile(metricsFile, mutantsMetrics)
    elif util.pathExists(metricsFile):
        mutantsMetrics = util.createDataFrameFromCSV(metricsFile, True)

        mutantsMetrics['ProgramName.UPPER'] = mutantsMetrics[
            "ProgramName"].str.upper()
        mutantsMetrics = mutantsMetrics.sort_values(
            by=['Column', 'ProgramName.UPPER'])
        del mutantsMetrics['ProgramName.UPPER']

    return mutantsMetrics
Ejemplo n.º 7
0
def getMetricsFromPrograms(possibleTargetColumns,
                           possibleClassifiers,
                           programsInfo,
                           writeMetrics=False,
                           bestParameter=False):
    fileFilter = '_bestParameter' if bestParameter else ''
    programs = [
        util.getPathName(program)
        for program in util.getPrograms('{}/Programs'.format(os.getcwd()))
    ]

    i_program_Max = 1  # Index of max score row on ML/Results/[COLUMN]/Programs/[ProgramName]_[Classifier].csv
    i_program_First = 6  # Index of the first occurence row on ML/Results/[COLUMN]/Programs/[ProgramName]_[Classifier].csv (useful when it is using _bestParameterFile)
    i_program_SampleSplit = 0  # Index of sample split column on ML/Results/[COLUMN]/Programs/[ProgramName]_[Classifier].csv
    #i_program_Accuracy = 1
    #i_program_Precision = 2
    #i_program_Recall = 3
    i_program_F1 = 4  # Index of F1 Score column on ML/Results/[COLUMN]/Programs/[ProgramName]_[Classifier].csv

    programsHeader = getProgramsHeader()

    columnsHeader = programsHeader.copy()
    columnsHeader.remove('Program')
    df_programsInfo = pd.DataFrame.from_dict(programsInfo,
                                             orient='index',
                                             columns=columnsHeader)

    # Column label on CSV programs info
    # MM_RF_F1
    # MM_RF_SampleSplit
    # MM_DT_F1
    # MM_DT_SampleSplit
    # MM_KNN_F1
    # MM_KNN_SampleSplit
    # MM_SVM_F1
    # MM_LDA_F1
    # MM_LR_F1
    # MM_GNB_F1
    # EM_RF_F1
    # EM_RF_SampleSplit
    # EM_DT_F1
    # EM_DT_SampleSplit
    # EM_KNN_F1
    # EM_KNN_SampleSplit
    # EM_SVM_F1
    # EM_LDA_F1
    # EM_LR_F1
    # EM_GNB_F1

    for program in programs:

        # Split the file in lines and columns (;)
        fileName = '{}/ML/Results/MINIMAL/Programs/{}_[CLASSIFIER]{}.csv'.format(
            os.getcwd(), program, fileFilter)
        file_Minimal_RF = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'RF')),
            ';')
        file_Minimal_DT = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'DT')),
            ';')
        file_Minimal_KNN = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'KNN')),
            ';')
        file_Minimal_SVM = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'SVM')),
            ';')
        file_Minimal_LDA = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'LDA')),
            ';')
        file_Minimal_LR = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'LR')),
            ';')
        file_Minimal_GNB = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'GNB')),
            ';')

        fileName = '{}/ML/Results/EQUIVALENT/Programs/{}_[CLASSIFIER]{}.csv'.format(
            os.getcwd(), program, fileFilter)
        file_Equivalent_RF = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'RF')),
            ';')
        file_Equivalent_DT = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'DT')),
            ';')
        file_Equivalent_KNN = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'KNN')),
            ';')
        file_Equivalent_SVM = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'SVM')),
            ';')
        file_Equivalent_LDA = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'LDA')),
            ';')
        file_Equivalent_LR = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'LR')),
            ';')
        file_Equivalent_GNB = util.splitFileInColumns(
            bestParameterFileExists(fileName.replace('[CLASSIFIER]', 'GNB')),
            ';')

        # Update the metrics of programs info
        df_programsInfo.loc[program]['MM_RF_F1'] = file_Minimal_RF[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['MM_RF_SampleSplit'] = file_Minimal_RF[
            i_program_First][i_program_SampleSplit]
        df_programsInfo.loc[program]['MM_DT_F1'] = file_Minimal_DT[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['MM_DT_SampleSplit'] = file_Minimal_DT[
            i_program_First][i_program_SampleSplit]
        df_programsInfo.loc[program]['MM_KNN_F1'] = file_Minimal_KNN[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['MM_KNN_SampleSplit'] = file_Minimal_KNN[
            i_program_First][i_program_SampleSplit]
        df_programsInfo.loc[program]['MM_SVM_F1'] = file_Minimal_SVM[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['MM_LDA_F1'] = file_Minimal_LDA[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['MM_LR_F1'] = file_Minimal_LR[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['MM_GNB_F1'] = file_Minimal_GNB[
            i_program_Max][i_program_F1]

        df_programsInfo.loc[program]['EM_RF_F1'] = file_Equivalent_RF[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['EM_RF_SampleSplit'] = file_Equivalent_RF[
            i_program_First][i_program_SampleSplit]
        df_programsInfo.loc[program]['EM_DT_F1'] = file_Equivalent_DT[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['EM_DT_SampleSplit'] = file_Equivalent_DT[
            i_program_First][i_program_SampleSplit]
        df_programsInfo.loc[program]['EM_KNN_F1'] = file_Equivalent_KNN[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program][
            'EM_KNN_SampleSplit'] = file_Equivalent_KNN[i_program_First][
                i_program_SampleSplit]
        df_programsInfo.loc[program]['EM_SVM_F1'] = file_Equivalent_SVM[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['EM_LDA_F1'] = file_Equivalent_LDA[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['EM_LR_F1'] = file_Equivalent_LR[
            i_program_Max][i_program_F1]
        df_programsInfo.loc[program]['EM_GNB_F1'] = file_Equivalent_GNB[
            i_program_Max][i_program_F1]

    if (writeMetrics):
        # Writting program info
        programsInfoFileName = '{}/Programs/ProgramsInfo.csv'.format(
            os.getcwd())
        data = []
        data.append(programsHeader)
        for index, values in df_programsInfo.iterrows():
            values = list(values.values)
            values.insert(0, index)
            data.append(values)
        util.writeInCsvFile(programsInfoFileName, data, delimiter=',')

    return df_programsInfo