def check_range(value, location, minValue):
     if (value < minValue) or (value > maxValue):
         prints.logerr(
             "Value {0} in {1} is an integer out of the required range.".
             format(value, location))
         return True
     return False
 def isinstance_float(value, location, minValue):
     if isinstance(value, float):
         if value.is_integer():
             tempInt = int(value)
             return True, tempInt, check_range(value, location, minValue)
         prints.logerr("Value {0} in {1} is not a required int.".format(
             value, location))
         return True, value, True
     return False, value, False
 def checkRange(dataSeries, rmin, rmax):
     errRange = False
     for value in dataSeries:
         if (value < rmin) or (value > rmax):
             prints.logerr(
                 '{0} outside of acceptable range for {1}. Accepted values are between {2} - {3}'
                 .format(value, dataSeries.name, rmin, rmax))
             errRange = True
     return errRange
 def isinstance_obj(value, location, minValue):
     try:
         tempFloat = float(value)
     except ValueError:
         prints.logerr("{0} in {1} is not an integer.".format(
             value, location))
         return True, 0, True
     isObj, value, intErrFlag = isinstance_float(tempFloat, location,
                                                 minValue)
     return isObj, value, intErrFlag
 def checkDup(dataSeries):
     errDup = False
     if dataSeries.duplicated().any():
         errDup = True
         sDuplicates = dataSeries[dataSeries.duplicated()]
         for i in range(len(sDuplicates)):
             prints.logerr("Duplicate {0} found '{1}'".format(
                 dataSeries.name,
                 dataSeries[dataSeries.duplicated()].iloc[i]))
     return errDup
 def checkExists(dataSeries):
     errNan = False
     for student in range(numStudents):
         # Verify series has no NaN values
         if pd.isna(dataSeries[student]) is True:  # If element empty
             prints.logerr(
                 "Empty element found in {0} column, row {1}".format(
                     dataSeries.name, student))
             errNan = True
     return errNan
 def checkFloat(dataSeries):
     errFloat = False
     if is_numeric_dtype(dataSeries) is False:
         errFloat = True
         for student in range(numStudents):
             try:
                 float(dataSeries[student])
             except ValueError:
                 prints.logerr(
                     "Unexpected data found in {0}, row {1} = '{2}'".format(
                         dataSeries.name, student, dataSeries[student]))
     return errFloat
 def checkBool(dataSeries):
     errBool = False
     if is_bool_dtype(dataSeries) is False:
         errBool = True
         for student in range(numStudents):
             val = dataSeries[student]
             if val is not False:
                 val = val.lower()
             if val is not False and val != 'true' and val != 'false':
                 prints.logerr(
                     "Unexpected data found in {0}, row {1} = '{2}'".format(
                         dataSeries.name, student, dataSeries[student]))
     return errBool
 def checkInt(dataSeries):
     errInt = False
     if is_numeric_dtype(dataSeries) is False:  # Look at series label
         errInt = True
         for student in range(numStudents):
             if dataSeries[student].isnumeric() is False and pd.isna(
                     dataSeries[student]) is False:  # Look at element
                 prints.logerr(
                     "Unexpected data found in {0}, row {1} = '{2}'".format(
                         dataSeries.name, student, dataSeries[student]))
     elif is_integer_dtype(dataSeries) is False:
         for student in range(numStudents):
             if not dataSeries[student].is_integer() and pd.isna(
                     dataSeries[student]) is False:
                 prints.logerr(
                     "Unexpected data found in {0}, row {1} = '{2}'".format(
                         dataSeries.name, student, dataSeries[student]))
     return errInt
Exemple #10
0
def csvFileCheck(csvFileName):
    global errFlag

    if not os.path.exists(csvFileName):
        # if original filename not found, add .csv extension and check again
        tempFileName = csvFileName + '.csv'
        if not os.path.exists(tempFileName):
            prints.logerr("{0} csv file can not be found.".format(csvFileName))
            errFlag = True
            return 0, 0
        csvFileName = tempFileName
    try:
        tempDataStruct = pd.read_csv(csvFileName)
    except ValueError:
        prints.logerr("{0} is not a valid csv file.".format(csvFileName))
        errFlag = True
        return 0, 0
    return tempDataStruct, csvFileName
 def match1D(dataSeries,
             parentData):  # dataSeries is being checked against parentData
     errMatch = False
     for student in range(numStudents):
         if pd.isna(dataSeries[student]) is False:  # If element not empty
             sMatch = False
             for j in range(len(
                     parentData)):  # Find matching element in parentData
                 if (dataSeries[student] == parentData[j]):
                     sMatch = True
                     # replace element with parentData index
                     dataSeries.at[student] = j
                     break
             if sMatch is False:
                 prints.logerr(
                     "No match found in {0} column, row {1} = {2:n}".format(
                         dataSeries.name, student, dataSeries[student]))
                 errMatch = True
     return errMatch
    def matchProject2D(dataFrame):
        errProj = False

        # when Pandas finds unexpected data type, elements in dataFrame cast as objects, not int
        for col in dataFrame.columns:
            # Set flag for typecasting when column is not numeric
            numeric = True
            if is_numeric_dtype(dataFrame[col]) is False:
                numeric = False
            for row in dataFrame.index:
                if pd.isna(
                        dataFrame[col][row]) is False:  # If element not empty
                    # Numeric typecasting for when a letter is present in the column
                    if numeric is False:
                        try:
                            dataFrame.at[row, col] = int(dataFrame[col][row])
                        except:
                            prints.logerr(
                                "Unexpected data found in {0}, row {1} = '{2}'"
                                .format(col, row, dataFrame[col][row]))
                            errProj = True

                    for i in range(len(projectIDs)
                                   ):  # Find matching id in global projectIDs
                        sChoiceMatch = False
                        if (dataFrame[col][row] == projectIDs[i]):
                            sChoiceMatch = True
                            # replace project id with project index
                            dataFrame.at[row, col] = i
                            break
                    if sChoiceMatch is False:
                        prints.logerr(
                            "No matching project id found for {0} = '{1}'".
                            format(col, dataFrame[col][row]))
                        errProj = True
        return errProj
def projectsHandler(projectsFileData):
    # arrays indexed by project
    global minTeamSize
    global maxTeamSize
    global projectIDs

    projectsErrFlag = False
    # verify required project csv headers are present and not duplicated
    requiredColumns = ['projectID', 'minTeamSize', 'maxTeamSize']
    for col in requiredColumns:
        if col not in projectsFileData.columns:
            prints.err(
                "Required {0} column not found in the projects csv file. Terminating Program."
                .format(col))
        else:
            findDuplicateCols(projectsFileData, col, 'Projects CSV file')

    # verify that all values in program csv file are integers
    projectIDs, intErr = int_checker('projectID', None, projectsFileData, 1)
    projectsErrFlag = check_error_flag(intErr, projectsErrFlag)

    # verify that there are no duplicate project IDs in the projectID column
    if projectsFileData.projectID.duplicated().any():
        projectsErrFlag = True
        projectDuplicates = projectsFileData[
            projectsFileData.projectID.duplicated()]
        for i in range(len(projectDuplicates)):
            prints.logerr("Duplicate projectID found: {0}".format(
                projectsFileData[
                    projectsFileData.projectID.duplicated()].iloc[i]))

    # if values for team sizes are blank, enter size from settings csv and then verify all values are integers
    projectsFileData['minTeamSize'] = projectsFileData['minTeamSize'].fillna(
        defaultMinTeamSize)
    minTeamSize, intErr = int_checker('minTeamSize', None, projectsFileData, 1)
    projectsErrFlag = check_error_flag(intErr, projectsErrFlag)
    projectsFileData['maxTeamSize'] = projectsFileData['maxTeamSize'].fillna(
        defaultMaxTeamSize)
    maxTeamSize, intErrFlag = int_checker('maxTeamSize', None,
                                          projectsFileData, 1)
    projectsErrFlag = check_error_flag(intErr, projectsErrFlag)

    # verify minTeamSize is not greater than maxTeamSize
    # zip() used to iterate in parallel over multiple iterables
    for minSize, maxSize, pid in zip(minTeamSize, maxTeamSize, projectIDs):
        if minSize > maxSize:
            projectsErrFlag = True
            prints.logerr(
                "minTeamSize is greater than maxTeamSize for projectID {0}.".
                format(pid))

    # warn user if gap found in projectID sequence, assuming projectIDs start at projectID '1'
    # arithmetic series = (n(firstNum + lastNum)) / 2, where n is # of terms in sequence,
    # then subtract real sum of projectIDs
    try:
        projectIDGap = projectIDs[-1] * (projectIDs[0] +
                                         projectIDs[-1]) / 2 - sum(projectIDs)
    except ValueError:
        projectIDGap = 0  # temp value to pass next if statement. Cause of error would have already been identified.
    if not projectIDGap == 0:
        prints.warn(
            "gap found in projectID sequence in the projects csv file.")

    return projectsErrFlag
def settingsHandler(settingsFileData):
    global weightMaxLowGPAStudents
    global weightMaxESLStudents
    global weightMaxTeamSize
    global weightMinTeamSize
    global weightStudentPriority
    global weightStudentChoice1
    global weightAvoid
    global effort
    global maxLowGPAStudents
    global maxESLStudents
    global lowGPAThreshold
    global defaultMaxTeamSize
    global defaultMinTeamSize

    settingsErrFlag = False

    # verify required settings csv headers are present and not duplicated
    requiredColumns = ['name', 'min', 'max', 'points']
    for col in requiredColumns:
        if col not in settingsFileData.columns:
            prints.err(
                "Required {0} column header not found in the settings csv. Terminating Program."
                .format(col))
        else:
            findDuplicateCols(settingsFileData, col, 'Settings CSV file')

    # verify required settings csv rows are present and not duplicated
    requiredRows = [
        'teamSize', 'lowGPAThreshold', 'maxLowGPAStudents', 'maxESLStudents',
        'weightMaxLowGPAStudents', 'weightMaxESLStudents', 'weightMinTeamSize',
        'weightMaxTeamSize', 'weightStudentPriority', 'weightStudentChoice1',
        'weightAvoid', 'effort'
    ]
    for row in requiredRows:
        if row not in settingsFileData['name'].values:
            prints.err(
                "Required {0} row not found in the settings csv file. Terminating Program."
                .format(row))
        if len(settingsFileData[settingsFileData['name'] == row]) > 1:
            prints.err(
                "Required {0} row is duplicated in the settings csv file. Terminating Program."
                .format(row))

    # verify required fields in 'points' column contain integers
    # if they are, assign value to global variable for scoring function to use
    weightMaxLowGPAStudents, intErr = int_checker(
        None, 'weightMaxLowGPAStudents',
        (settingsFileData.set_index('name').at['weightMaxLowGPAStudents',
                                               'points']), 0)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)
    weightMaxESLStudents, intErr = int_checker(
        None, 'weightMaxESLStudents',
        (settingsFileData.set_index('name').at['weightMaxESLStudents',
                                               'points']), 0)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)
    weightMinTeamSize, intErr = int_checker(
        None, 'weightMinTeamSize',
        (settingsFileData.set_index('name').at['weightMinTeamSize', 'points']),
        0)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)
    weightMaxTeamSize, intErr = int_checker(
        None, 'weightMaxTeamSize',
        (settingsFileData.set_index('name').at['weightMaxTeamSize', 'points']),
        0)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)
    weightStudentPriority, intErr = int_checker(
        None, 'weightStudentPriority',
        (settingsFileData.set_index('name').at['weightStudentPriority',
                                               'points']), 0)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)
    weightStudentChoice1, intErr = int_checker(
        None, 'weightStudentChoice1',
        (settingsFileData.set_index('name').at['weightStudentChoice1',
                                               'points']), 0)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)
    weightAvoid, intErr = int_checker(
        None, 'weightAvoid',
        (settingsFileData.set_index('name').at['weightAvoid', 'points']), 0)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)

    # verify required values in 'max' and 'min' column are integers
    # if they are, assign value to global variable for scoring function to use
    defaultMaxTeamSize, intErr = int_checker(
        None, 'teamSize',
        (settingsFileData.set_index('name').at['teamSize', 'max']), 1)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)
    defaultMinTeamSize, intErr = int_checker(
        None, 'teamSize',
        (settingsFileData.set_index('name').at['teamSize', 'min']), 1)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)
    maxLowGPAStudents, intErr = int_checker(
        None, 'maxLowGPAStudents',
        (settingsFileData.set_index('name').at['maxLowGPAStudents', 'max']), 1)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)
    maxESLStudents, intErr = int_checker(
        None, 'maxESLStudents',
        (settingsFileData.set_index('name').at['maxESLStudents', 'max']), 1)
    settingsErrFlag = check_error_flag(intErr, settingsErrFlag)

    # if effort value is provided in the csv file, verify the value is an integer within the required range
    # if it is not, use default value
    try:
        effort = int(settingsFileData.set_index('name').at['effort', 'max'])
    except ValueError:
        prints.warn(
            "valid 'effort' value not found in the settings csv. Running with default value."
        )
    if effort < minEffort or effort > maxEffort:
        effort = defaultEffort
        prints.warn(
            "'effort' in the settings csv is not an int between 1 and 100. Running with default value of 20."
        )

    # verify that provided lowGPAThreshold is not empty and that it's a float within range
    # If it is, assign value to global variable for scoring function to use
    try:
        lowGPAThreshold = float(
            settingsFileData.set_index('name').at['lowGPAThreshold', 'min'])
    except ValueError:
        prints.logerr("The lowGPAThreshold 'min' value is not a float.")
        settingsErrFlag = True
        lowGPAThreshold = 0  # temp value to pass statement below to allow program to continue checking for errors
    if (lowGPAThreshold < minGPAThreshold) or (
            lowGPAThreshold > maxGPAThreshold) or (pd.isna(lowGPAThreshold)):
        prints.logerr(
            "lowGPAThreshold 'min' setting requires a 0.00 - 4.00 value.")
        settingsErrFlag = True

    return settingsErrFlag