Ejemplo n.º 1
0
def check_medal_column_rules(row_to_process, corrupt):
    """Returns a string with value True if the medal has a value but place does not or \
       row doesn't follow rules checked earlier 

    Parameters:
        row_to_process (str): Row whose medal value has to be checked.
        corrupt (boolean) : status of row
        
    Returns:
        boolean: corrupt value as input or set as True if medal doesn't follow checked rules

    Preconditions:
        corrupt value is already True if the row doesn't abide with rules checked before.
        functions get_column and replace_column have been imported from assign1_utilities.
        column position specified as constant variable
    """
    corrupt_after_check = corrupt

    medal_value = get_column(row_to_process, MEDAL_POSITION)

    place_value = get_column(row_to_process, PLACE_POSITION)

    if medal_value == "Gold":
        if place_value != "1":
            corrupt_after_check = True

    if medal_value == "Silver":
        if place_value != "2":
            corrupt_after_check = True

    if medal_value == "Bronze":
        if place_value != "3":
            corrupt_after_check = True

    return (corrupt_after_check)
Ejemplo n.º 2
0
def correct_column_format(row_to_process):
    """Returns a string with row value after making country code and medal uppercase.

    Parameters:
        row_to_process (str): Row whose Country Code and Medal is to be changed.
        
    Returns:
        str: Row with corrected Country Code and Medal values.

    Preconditions:
        functions get_column and replace_column have been imported from assign1_utilities.
        column position specified as constant variable.
    """
    #Make country code capital. Row is already corrupt if it exceeds max character length
    country_code_value = get_column(row_to_process, COUNTRY_CODE_POSITION)
    country_code_corrected = country_code_value
    # If country code only has characters make it uppercase
    if country_code_value.isalpha() == True:
        country_code_corrected = country_code_value.upper()

    #Make Medal capital. Don't change if it had invalid characters
    medal_value = get_column(row_to_process, MEDAL_POSITION)
    medal_corrected = correct_medal_format(medal_value)
    row_to_process = replace_column(row_to_process, country_code_corrected,
                                    COUNTRY_CODE_POSITION)
    row_to_process = replace_column(row_to_process, medal_corrected,
                                    MEDAL_POSITION)
    return (row_to_process)
Ejemplo n.º 3
0
def fix_length(row) :
    """Takes in a row and return it with the event name, first name and last
    name columns truncated after 30 characters.

    Parameters:
        row (str): String of data with comma separators.

    Return:
        str: The modified row is returned. 

    Preconditions:
        row != None
    """

    eventname = get_column(row, 0)  #Get event name column from 'row'.
    firstname = get_column(row, 1)
    lastname = get_column(row, 2)
    if len(firstname) > 30 :
        firstname = truncate_string(firstname, 30)  #truncate firstname if it's over 30 characters long
    if len(lastname) > 30 :
        lastname = truncate_string(lastname, 30)  #truncate lastname if it's over 30 characters long
    if len(eventname) > 30 :
        eventname = truncate_string(eventname, 30)  #truncate eventname if it's over 30 characters long
    row = replace_column(row, eventname, 0)  #replace first name column with the truncated firstname       
    row = replace_column(row, firstname, 1)  #replace first name column with the truncated firstname
    row = replace_column(row, lastname, 2)  #replace last name column with the truncated lastname
    return row
Ejemplo n.º 4
0
def check_name(row) :
    """Checks a row to see whether the first name  and last name columns contains valid data.

    Parameters:
        row (str): String of data with comma separators.

    Return:
        bool: Return True if data is corrupt, otherwise return False.

    Preconditions:
        row != None
    """
    first_name = get_column(row, 1)  #Get the first name column from 'row'.
    last_name = get_column(row, 2)

    with open("athlete_names.csv", "r") as names:   #Imports the athlete_names.csv master file.
        for row in names :
            row = row[:-1]  #Removes the new line characters (\n) from the end of each row.
            first_master = get_column(row, 0)   #First name extracted from the master file.
            first_master = truncate_string(first_master, 30)  #We are comparing between truncated versions of the strings.
            last_master = get_column(row, 1)
            last_master = truncate_string(last_master, 30)
            if (first_name == first_master) and (last_name == last_master) :
                return False    #If a match is found, return False.
    return True
Ejemplo n.º 5
0
def check_world_record_column_rules(row_to_process, corrupt):
    """Returns a string with value True if World Record is a legal value \
        but not equal to Olympic Record or row doesn't follow rules checked earlier 

    Parameters:
        row_to_process (str): Row whose world record and olympic record value is checked.
        corrupt (boolean) : status of row
        
    Returns:
        boolean: corrupt value as input or set as True if world record doesn't follow checked rules

    Preconditions:
        corrupt value is already True if the row doesn't abide with rules checked before.
        functions get_column and replace_column have been imported from assign1_utilities.
        column position specified as constant variable.
    """
    corrupt_after_check = corrupt

    world_record_value = get_column(row_to_process, WORLD_RECORD_POSITION)
    world_record_value_check = check_number_format(world_record_value,
                                                   corrupt_after_check)

    olympic_record_value = get_column(row_to_process, OLYMPIC_RECORD_POSITION)

    if world_record_value_check == False and len(
            world_record_value
    ) <= WORLD_RECORD_MAX_CHARACTER_LENGTH and world_record_value != "":
        if world_record_value != olympic_record_value:
            corrupt_after_check = True

    return (corrupt_after_check)
Ejemplo n.º 6
0
def check_records(row) :
    """Checks a row to see whether the olympic record, world record and track
    record columns contain valid data (if any). Also checks that olympic
    record is equal to world record. 

    Parameters:
        row (str): String containing the row to be checked.

    Return:
        bool: Return True if data is corrupt, otherwise return False.

    Preconditions:
        row != None
    """
    olympic_record = get_column(row,8)  #Gets the olympic record column from 'row'.
    world_record = get_column(row,9) 
    track_record = get_column(row,10)
    if (len(olympic_record) > 8 or len(world_record) > 8 or len(track_record) > 8
        or check_invalid_character("1234567890.", olympic_record) or
        check_invalid_character("1234567890.", world_record) or
        check_invalid_character("\n1234567890.", track_record)) :
        return True  #Return True if at least one of the three records is too long or contains invalid characters. 
    if world_record != "" and olympic_record != world_record :
        return True  #Return True if world record is not empty but olympic record is not equal to world record. 
    return False
Ejemplo n.º 7
0
def max_char_corrupt(row): # Check maximum character length and corrupts them if exceeded
    
    """ Return a boolean if data at the indicated column in the row is satisfied by a condition.. 

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        bool: Corrupt is True for 'row' data in the indicated column.
        
    Preconditions:
        corrupt = False
    """
    corrupt = False     # Assign corrupt as a boolean variable
    for i in range (12):    
        if 4 <= i <= 5:                         # For column 5 to 6    
            row_update = get_column(row, i)     # Stores a string from the indicated 'i' value of column of row
            if len(row_update) > 3:             
                corrupt = True                  # Corrupt if the length of the characters in 'row_update' > 3
        elif i == 6 or i == 8:                  # For column 7 or 9
            row_update = get_column(row, i)
            if len(row_update) > 6:             
                corrupt = True                  # Corrupt if length > 6 character limit
        elif i == 7 or i == 9 or i == 10 or i == 11:    # Apply to column 8,10,11,12
            row_update = get_column(row, i)
            if len(row_update) > 8:                     
                corrupt = True                          # Corrupt if length exceeds max 8 char length
    return corrupt
Ejemplo n.º 8
0
def check_medal(row) :
    """Checks a row to see whether the medal column contains valid data (either
    "Gold", "Silver" or "Bronze"). If there is a legal value in the medal
    column, then check whether the value in the place column matches the medal.

    Parameters:
        row (str): String containing the row to be checked.

    Return:
        bool: Return True if data is corrupt, otherwise return False.

    Preconditions:
        row != None
    """
    medal = get_column(row, 7)  #Isolate the medal column from 'row'.
    place = get_column(row, 4)  #Isolate the place column from 'row'.

    medal_invalid = (medal != "Gold" and medal != "Silver" and medal != "Bronze"
                     and medal != "") #Checks if the medal value is not one of "Gold", "Silver" and "Bronze".
    
    place_does_not_match_medal = ((medal == "Gold" and place != "1") or
                                  (medal != "Gold" and place == "1") or
                                  (medal == "Silver" and place != "2") or
                                  (medal != "Silver" and place == "2") or
                                  (medal == "Bronze" and place != "3") or
                                  (medal != "Bronze" and place == "3")) #Checks if the 'place' column is not consistent with the 'medal' column. 
    
    if medal_invalid :
        return True  #Return True if 'medal' is not empty and not equal to one of these three options.
    if place_does_not_match_medal :
        return True  #Return True if place does not match medal. 
    return False
Ejemplo n.º 9
0
def rule_illegal_character(row):    # Check if there are any illegal characters according to format rules
    """ Return a boolean if data at the indicated column in the row is satisfied by conditions.  

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        bool: Corrupt is True for 'row' data in the indicated column.
        
    Preconditions:
        corrupt = False
    """
    corrupt = False     # Assign Boolean variable
    legal_character = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-'"# Assign a list of legal characters
    row_update = get_column(row, 1)     # String in column 1 in row
    for c in row_update:                # For every character in the string
        if c not in legal_character:    # Check if any of these characters belong to 'legal_character' 
             corrupt = True             # If any character is found to be 'non-legal', data in row is corrupt
    row_update = get_column(row, 2)     # Same rules apply to corresponding columns as above
    for c in row_update:                
        if c not in legal_character:    
            corrupt = True
    row_update = get_column(row, 3)     # Same rules apply to corresponding columns as above
    for c in row_update:
        if c not in legal_character:
            corrupt = True
    row_update = get_column(row, 4)     # Same rules apply to corresponding columns as above
    for c in row_update:
        if c not in legal_character:
            corrupt = True
    return corrupt
Ejemplo n.º 10
0
def fix_medal(row) :
    """Takes in a row and return it with the data in the medal column changed to
    either "Gold", "Silver" or "Bronze" (or no change). Calling this function
    turns 'Gold', 'Silver' and 'Bronze' in any case to 'Gold', 'Silver' and
    'Bronze' exactly. 

    Parameters:
        row (str): String of data with comma separators.

    Return:
        str: The modified row is returned. 

    Preconditions:
        row != None
    """
    
    medal = get_column(row, 7)  #isolate the medal column from row
    medal = medal.upper()   #convert to upper case for easier comparisons
    if medal == "GOLD" :
        medal = "Gold"
    elif medal == "SILVER" :
        medal = "Silver"
    elif medal == "BRONZE" :
        medal = "Bronze"
    row = replace_column(row, medal, 7)   #replace the original medal value with the properlly formatted version
    return row
Ejemplo n.º 11
0
def check_world_olympic_record(row):    # Check if the world record corresponds to the olympic record
    """ Return a boolean if data at the indicated column in the row is satisfied by conditions.  

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        bool: Corrupt is True for 'row' data in the indicated column.
        
    Preconditions:
        corrupt = False
    """
    corrupt = False                         # Assign corrupt as a boolean var
    world_record = get_column(row, 10)      
    olympic_record = get_column(row, 9)
    if len(world_record) > 0:               # If entry is occupied in the world record column
        if world_record != olympic_record:  # Compare world record and olympic record
            corrupt = True                  # Data is corrupt if two records do not match
        return corrupt
Ejemplo n.º 12
0
def check_three_medal_rule(row_to_process, corrupt):
    """Returns a string with value True if row doesn't have valid medal value or \
       row doesn't follow rules checked earlier 

    Parameters:
        row_to_process (str): Row on which three medal rule is to be checked.
        corrupt (boolean) : status of row
        
    Returns:
        boolean: corrupt value as input or set as True if row should have medal value but doesn't

    Preconditions:
        corrupt value is already True if the row doesn't abide with rules checked before.
        functions get_column and replace_column have been imported from assign1_utilities.
        column position specified as constant variable.
    """
    corrupt_after_check = corrupt
    event_name = get_column(row_to_process, EVENT_NAME_POSITION)
    place = get_column(row_to_process, PLACE_POSITION)
    medal = get_column(row_to_process, MEDAL_POSITION)

    # Check is event has medal in any instance
    event_has_medal = check_event_medal(event_name)

    three_medal_check = False

    # If event has medal, places 1,2 and 3 must have medal and rest shouldn't
    if event_has_medal == True:
        if place == "1" or place == "2" or place == "3":
            if medal != "":
                three_medal_check = True
        if place != "1" and place != "2" and place != "3":
            if medal == "":
                three_medal_check = True
    if event_has_medal == False:
        three_medal_check = True

    if three_medal_check == False:
        corrupt_after_check = True

    # Return input corrupt value or set corrupt value as True if row should have a medal
    return (corrupt_after_check)
Ejemplo n.º 13
0
def rule_numbers(row):  # Check for any illegal values in the indicated columns
    """ Return a boolean if data at the indicated column in the row is satisfied by conditions.  

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        bool: Corrupt is True for 'row' data in the indicated column.
        
    Preconditions:
        corrupt = False
    """
    corrupt = False                             # Assign a boolean variable
    legal_number_format = "1234567890.\n"       # List of legal numbers('.' accounts for float)
    # ' \n ' - 'new line' must be assigned to prevent the last column from satisfying the boolean
    
    if len(get_column(row, 6)) > 0:             # When column 7 contain a value
        row_update = get_column(row, 6)         
        for c in row_update:                    # For every character in the string of column 67in row
            if c not in legal_number_format:    # Check for characters that do not belong to legal_number_format
                corrupt = True                  # If illegal, data in row is corrupted
    elif len(get_column(row, 7)) > 0:           #-- Same rules apply as above for column 8
        row_update = get_column(row, 7)         
        for c in row_update:                            
            if c not in legal_number_format:    
                corrupt = True                  
    elif len(get_column(row, 9)) > 0:           #-- Same rules apply as above for column 10
        row_update = get_column(row, 9)         
        for c in row_update:                    
            if c not in legal_number_format:    
                corrupt = True                  
    elif len(get_column(row, 10)) > 0:          #-- Same rules apply as above for column 11
        row_update = get_column(row, 10)        
        for c in row_update:                    
            if c not in legal_number_format:    
                corrupt = True                  
    elif len(get_column(row, 11)) > 0:          #--Same rules apply as above for column 12
        row_update = get_column(row, 11)        
        for c in row_update :                  
            if c not in legal_number_format:
                corrupt = True                  
    return corrupt
Ejemplo n.º 14
0
def check_place_score_time(row) :
    """Checks place, score and time columns to ensure that if place is valid and nonempty, then one, but not both, of score and time must contain a legal value (valid and not empty). 

    Parameters:
        row (str): String containing the row to be checked.

    Return:
        bool: Return True if data is corrupt, otherwise return False.

    Preconditions:
        row != None
    """
    place = get_column(row,4)   #Isolate the place column from 'row'.
    score = get_column(row,5)   #Isolate the score column from 'row'.
    time = get_column(row,6)    #Isolate the time column from 'row'.
    place_integer = not (check_invalid_character("1234567890", place) or place == "")  #Checks if 'place' is an integer (and nonempty).
    if place_integer :  
        if (check_score(row) and check_time(row)) or (score == "" and time == "") :
            return True  #Return True if score and time are both invalid or both empty. 
        elif ((not check_score(row)) and score != "" and (not check_time(row)) and time != "") :
            return True  #Return True if score and time are both valid and nonempty.
    return False
Ejemplo n.º 15
0
def upper_case_country(row):    # Upper cases country code
    """ Upper case all values in the Country Code column. 

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        str: Updated row with upper casing of country code.
    """
    row_update = get_column(row, 4)             # String from column 4 in the row
    row_updated = row_update.upper()            # Upper cases the string
    row = replace_column(row, row_updated, 4)   # Store updated string in the indicated column in row
    return row
Ejemplo n.º 16
0
def check_col_9_6(row):                  # Check if the value in column 6 is appropriate to the respective data in column 9
    """ Return a boolean if data at the indicated column in the row is satisfied by a condition.  

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        bool: Corrupt is True for 'row' data in the indicated column.
        
    Preconditions:
        corrupt = False
    """
    corrupt = False                     # Assign corrupt as a boolean variable
    row_update = get_column(row, 8)     # String from the Medal column
    medal = row_update.upper()          # Variable 'medal' no store the upper cased string
    place = get_column(row, 5)          # Store a value from the Place column in 'place' variable
    if place == "1" and medal != "GOLD":
        corrupt = True                  # Corrupt if 1st place but no GOLD medal
    elif place == "2" and medal != "SILVER":
        corrupt = True                  # Corrupt if 2nd place but no SILVER medal
    elif place == "3"and medal != "BRONZE":
        corrupt = True                  # Corrupt if 3rd place but no BRONZE medal
    return corrupt
Ejemplo n.º 17
0
def check_column_format(row_to_process, corrupt):
    """Returns a string with value True if the column data is not as per specified format or \
       row doesn't follow rules checked earlier 

    Parameters:
        row_to_process (str): Row whose columns are checked for character values.
        corrupt (boolean) : status of row.
        
    Returns:
        boolean: corrupt value as input or set as True if column formatting is incorrect.

    Preconditions:
        functions get_column and replace_column have been imported from assign1_utilities
        functions have been defined checking the formatting of names, country code, \
        place and numbers.
        column position specified as constant variable.
        corrupt value is already True if the row doesn't abide with rules checked before.
    """
    column_position = 0
    while column_position < 11:
        value = get_column(row_to_process, column_position)
        if column_position == EVENT_NAME_POSITION \
           or column_position == ATHLETE_FIRST_NAME_POSITION \
           or column_position == ATHLETE_SURNAME_POSITION:
            corrupt = check_name_format(value,
                                        corrupt)  # Check name formatting
        if column_position == COUNTRY_CODE_POSITION:
            corrupt = check_country_code_format(
                value, corrupt)  # Check for Country Code
        if column_position == PLACE_POSITION:
            corrupt = check_place_format(value, corrupt)  # Check for Place
        if column_position == MEDAL_POSITION:
            if value != "Gold":
                if value != "Silver":
                    if value != "Bronze":
                        if value != "":
                            corrupt = True
        if column_position == SCORE_POSITION or column_position == TIME_POSITION \
           or column_position == OLYMPIC_RECORD_POSITION \
           or column_position == WORLD_RECORD_POSITION:
            corrupt = check_number_format(value,
                                          corrupt)  # Check number formatting
        if column_position == TRACK_RECORD_POSITION:
            last_value = value[:
                               -1]  # Remove \n at the end before checking formatting
            corrupt = check_number_format(last_value,
                                          corrupt)  # Check number formatting
        column_position += 1
    return (corrupt)
Ejemplo n.º 18
0
def check_event_medal(event_name):
    """Returns a string with value True if event has at least one medal in dataset.

    Parameters:
        event_name (str): Event which is checked for having medal.
        
    Returns:
        boolean: event_has_medal as True if the event has medal in any of the record else False

    Preconditions:
        functions get_column and replace_column have been imported from assign1_utilities.
    """
    # Open raw file and check if the event has medal in any instance
    event_has_medal = False
    with open("athlete_data.csv", "r") as raw_data_file2:
        for row in raw_data_file2:
            event_name_raw_data = get_column(row, 1)
            if event_name == event_name_raw_data:
                medal_raw_data = get_column(row, 8)
                if medal_raw_data != "":
                    event_has_medal = True

    # Return True if event has medal for at least one occurance
    return (event_has_medal)
Ejemplo n.º 19
0
def max_char_30(row):  # Truncates sport, atheletes' first and family name to 30 characters
    
    """ Truncate data in indicated column to 30 characters. 

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        str: Updated row with truncated data in columns 1 ~ 4.
    """
    for i in range(4):                      
        row_update = get_column(row, i)     # row_update now contain data from row and in the 'i' value of column
        row_updated = truncate_string(row_update, 30)   # Truncates string to 30 characters and stores in row_updated
        row = replace_column(row, row_updated, i)       # Updated data replaces the exisiting data at indicated column in the row  
    return row
Ejemplo n.º 20
0
def check_col_6_7_8(row):   # Corrupt if the value stored in column 6 is a whole number and occupied entries in BOTH column 7,8
                           # Also corrupt if no values stored in column 7,8 in row
    """ Return a boolean if data at the indicated column in the row is satisfied by a condition.  

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        bool: Corrupt is True for 'row' data in the indicated column.
        
    Preconditions:
        corrupt = False
    """
    corrupt = False                     # Assign corrupt as a boolean variable
    row_update = get_column(row, 5)     # Obtain string from column 6 in row
    if row_update.isdigit():            # Determine if the string is a digit
        # Digit indicates a WHOLE number
        if len(get_column(row, 6)) > 0 and len(get_column(row, 7)) > 0: # Check if both column 7,8 store a value
            corrupt = True
        elif len(get_column(row, 6)) == 0 and len(get_column(row, 7)) == 0: # Check if both column 7,8 are empty entries
            corrupt = True
        else:
            pass
        return corrupt
Ejemplo n.º 21
0
def check_time(row) :
    """Checks a row to see whether the time column contains valid data.

    Parameters:
        row (str): String containing the row to be checked.

    Return:
        bool: Return True if data is corrupt, otherwise return False.

    Preconditions:
        row != None
    """
    time = get_column(row,6)    #Isolate the time column from 'row'.
    time_not_float = check_invalid_character("1234567890.", time)   #Checks if 'time' is an integer/float by checking that it only contains digits and ".".
    if len(time) > 8 or (time != "" and time_not_float) :
        return True  #Return True if 'time' is too long or (is not a float/integer and not empty).
    return False
Ejemplo n.º 22
0
def fix_countrycode(row) :
    """Takes in a row and return it with the all letters in the country code
    column transformed to upper case. 

    Parameters:
        row (str): String of data with comma separators.

    Return:
        str: The modified row is returned. 

    Preconditions:
        row != None
    """
    
    countrycode = get_column(row, 3)    #isolate country code column from row
    countrycode = countrycode.upper()   #make the country code value all upper case
    row = replace_column(row, countrycode, 3)   #replace the original country code value with the upper case version
    return row
Ejemplo n.º 23
0
def check_column_character_length(row_to_process):
    """Returns a string with value True if the column width exceeds max character.

    Parameters:
        row_to_process (str): Row whose column values are checked for length of characters.
        
    Returns:
        boolean: True if column width exceeds specified max character length, else False.

    Preconditions:
        functions get_column and replace_column have been imported from assign1_utilities.
        column position and column width specified as constant variable.
    """
    column_position = 3
    corrupt = False
    while column_position < 11:
        value = get_column(row_to_process, column_position)
        if column_position == COUNTRY_CODE_POSITION:
            if len(value) > COUNTRY_CODE_MAX_CHARACTER_LENGTH:
                corrupt = True
        if column_position == PLACE_POSITION:
            if len(value) > PLACE_MAX_CHARACTER_LENGTH:
                corrupt = True
        if column_position == SCORE_POSITION:
            if len(value) > SCORE_MAX_CHARACTER_LENGTH:
                corrupt = True
        if column_position == TIME_POSITION:
            if len(value) > TIME_MAX_CHARACTER_LENGTH:
                corrupt = True
        if column_position == MEDAL_POSITION:
            if len(value) > MEDAL_MAX_CHARACTER_LENGTH:
                corrupt = True
        if column_position == OLYMPIC_RECORD_POSITION:
            if len(value) > OLYMPIC_RECORD_MAX_CHARACTER_LENGTH:
                corrupt = True
        if column_position == WORLD_RECORD_POSITION:
            if len(value) > WORLD_RECORD_MAX_CHARACTER_LENGTH:
                corrupt = True
        if column_position == TRACK_RECORD_POSITION:
            if len(value[:-1]) > TRACK_RECORD_MAX_CHARACTER_LENGTH:
                corrupt = True
        column_position += 1
    return (corrupt)
Ejemplo n.º 24
0
def check_countrycode(row) :
    """Checks a row to see whether the country code column contains valid data.

    Parameters:
        row (str): String containing the row to be checked.

    Return:
        bool: Return True if data is corrupt, otherwise return False.

    Preconditions:
        row != None
    """
    country_code = get_column(row,3)
    with open("country_codes.csv", "r") as country_codes:
        for row in country_codes :
            row = row[:-1]
            if country_code == row :
                return False #If a match is found between 'country_code' and one of the country codes in the master file, invalid_country to False.
    return True
Ejemplo n.º 25
0
def fix_medal_letters(row):     # Fix medal name (upper/lower case)

    """ Fix letter casing of valid medal names. 

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        str: Updated row with correct casing of medal names.
    """
    row_update = get_column(row, 8)             # A new variable store string from the Medal Column
    row_updated = row_update.upper()            # Uppercases the string contained in row_update and stores it in row_updated 
    if row_updated == "GOLD":                   
        row = replace_column(row, "Gold", 8)    # Fix GOLD as Gold in the indicated column in the row
    elif row_updated == "SILVER":       
        row = replace_column(row, "Silver", 8)  # Fix SILVER as Silver
    elif row_updated == "BRONZE":
        row = replace_column(row, "Bronze", 8)  # Fix Bronze as Bronze
    return row
Ejemplo n.º 26
0
def rule_place(row):    # Check for any illegal values in the 'Place' column
    """ Return a boolean if data at the indicated column in the row is satisfied by conditions.  

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        bool: Corrupt is True for 'row' data in the indicated column.
        
    Preconditions:
        corrupt = False
    """
    corrupt = False                 # Assign boolean variable
    row_update = get_column(row, 5) # Store string from the 'Place' column in row_update
    if not row_update.isdigit():    # If the string in row_update is not a digit
            if row_update == "DNS" or row_update == "DNF" or row_update == "PEN" or row_update == "":
                pass                # Strings such as DNS, DNF, PEN are exceptions
            else:
                corrupt = True      # Everything else means corrupted data
            return corrupt
Ejemplo n.º 27
0
def check_missing_entry(row):   # Corrupt if any missing entries in columns 1 to 5
    
    """ Return a boolean if data at the indicated column in the row is satisfied by a condition..  

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        bool: Corrupt is True for 'row' data in the indicated column.
        
    Preconditions:
        corrupt = False
    """
    corrupt = False     # Assign corrupt as a boolean variable
    for i in range(5):
        if 0 <= i <= 4:     # Apply to column 1 to 5
            row_update = get_column(row, i) # String containing the data from indicated column
            if row_update == "":        # Corrupt if value is missing/blank
                corrupt = True
    return corrupt
Ejemplo n.º 28
0
def check_event_name(row) :
    """Checks a row to see whether the event name column contains valid data.  

    Parameters:
        row (str): String of data with comma separators.

    Return:
        bool: return True if data is corrupt, otherwise return false

    Preconditions:
        row != None
    """
    event_name = get_column(row, 0) #Get the event name column from 'row'.
    with open("event_names.csv", "r") as event_names:   #Imports the event_names.csv master file. 
        for row in event_names :
            row = row[:-1]  #Removes the new line characters (\n) from the end of each row.
            row = truncate_string(row, 30)  #We are comparing between truncated versions of the strings.
            if event_name == row :
                return False #If a match is found between 'event_name' and one of the event names in the master file, set invalid_event to False.
    return True #No match is found, so return True.
Ejemplo n.º 29
0
def check_place(row) :
    """Checks a row to see whether the place column contains valid data (either
    a whole number, empty or "DNS", "DNF" or "PEN").

    Parameters:
        row (str): String containing the row to be checked.

    Return:
        bool: Return True if data is corrupt, otherwise return False.

    Preconditions:
        row != None
    """
    place = get_column(row,4)   #Isolate the place column from 'row'.
    place_not_integer = check_invalid_character("1234567890", place) #Checks if 'place' is an integer by checking that it only contains digits.
    place_invalid = (place_not_integer and not (place == "DNS" or place == "DNF"
                                                or place == "PEN" or place == "")) #Checks if place is not an integer and also not empty or not one of the predefined accepted values.
    
    if len(place) > 3 or place_invalid :
        return True #Return True if 'place' is too long or (is not an integer and not (empty or one of the predefined strings)).
    return False
Ejemplo n.º 30
0
def check_valid_medal(row):     # Check for non-legal values in the Medal column
    
    """ Return a boolean if data at the indicated column in the row is satisfied by a condition.. 

    Parameters:
        row (str): String of data with comma separators (CSV format).

    Return:
        bool: Corrupt is True for 'row' data in the indicated column.
        
    Preconditions:
        corrupt = False
    """
    corrupt = False                     # Assign corrupt as a boolean variable
    row_update = get_column(row, 8)     # Obtain a string from the Medal Column 
    row_updated = row_update.upper()    # Uppercases the string obtained
    # Upper casing all characters in the string is convenient for checking conditions below
    if row_updated == "GOLD" or row_updated == "SILVER" or row_updated == "BRONZE" or row_updated == "":
        pass            
    else:
        corrupt = True
    return corrupt