def _assertion_roster_mentions(cmt_data_list, roster_file, roster_list, team):
    """
    Function to test assertions for function roster_mentions().
    """
    assertions.assert_cmt_data_list(cmt_data_list)
    assertions.assert_roster_file_format(roster_file)
    assertions.assert_str_list(roster_list)
    assertions.assert_team(team)
def _assertion_coach_mentions_glob(global_ID, agg_rost_ment_file, roster_file,
                                   mgmt_list, sent_dict, result, team):
    """ Function to test assertions for coach_mentions_glob(). """
    assertions.assert_global_ID(global_ID)
    assertions.assert_agg_roster_ment_file_format(agg_rost_ment_file,
                                                  roster_file)
    assertions.assert_roster_file_format(roster_file)
    assertions.assert_str_list(mgmt_list)
    assertions.assert_result(result)
    assertions.assert_team(team)
def _assertion_roster_mentions_glob(global_ID, rost_ment_file, roster_file,
                                    roster_list, team):
    """
    Function to test assertions for function roster_mentions_glob().
    """
    assertions.assert_global_ID(global_ID)
    assertions.assert_roster_ment_by_game_file_format(rost_ment_file)
    assertions.assert_roster_file_format(roster_file)
    assertions.assert_str_list(roster_list)
    assertions.assert_team(team)
def _assertion_comment_roster(raw_data_file, roster_file, roster_list,
                              cmt_data_list, team):
    """
    Function to test assertions for function comment_roster().
    """
    assertions.assert_raw_data_file_format(raw_data_file)
    assertions.assert_roster_file_format(roster_file)
    assertions.assert_str_list(roster_list)
    assertions.assert_cmt_data_list(cmt_data_list)
    assertions.assert_team(team)
def find_roster_names(roster_file):
    """
    Returns a list of strings of player names from a given csv file. The inputted
    csvfile must contain a column with the header "Player" or else the function
    returns a FormatError.

    Parameter roster_file: a reader object that contains information about players.
    Precondition: must be a DataFrame object created by the pandas module. It must
    contain the column "Player".
    """
    assertions.assert_roster_file_format(roster_file)
    players = roster_file["Player"]
    lst = []
    for index in range(roster_file.shape[0]):
        lst.append(players[index])
    return lst
def find_management(roster_file):
    """
    Given a roster file, find the coaches, GMs, and owners of the team. Return
    a list of these people.

    Parameter roster_file: a reader object that contains information about player
    names and nicknames.
    Precondition: must be a DataFrame object created by the pandas module and
    contain the necessary headers.
    """
    assertions.assert_roster_file_format(roster_file)
    position = roster_file["Pos"]
    player = roster_file["Player"]

    mgmt_list = []
    important_people = ["Coach", "GM", "Owner", "President"]
    for index in range(roster_file.shape[0]):
        if position[index] in important_people:
            mgmt_list.append(player[index])
    return mgmt_list
Example #7
0
def _assertion_calc_mgmt_stats(global_ID_list, mgmt_list, roster_file, team):
    """ Function to assert assertions for calc_mgmt_stats(). """
    assertions.assert_int_list(global_ID_list)
    assertions.assert_str_list(mgmt_list)
    assertions.assert_roster_file_format(roster_file)
    assertions.assert_team(team)
Example #8
0
def extract_col_data(raw_data_file, roster_file, word_file_reader, team):
    """
    Returns a two dimensional list. Each inner list corresponds to a named entity,
    its category (person, place, nickname) with its associated global and local
    ID. All global_IDs and local_IDs must be integers. Comments can be of any
    category, but will be cast into strings. The function also checks if the
    comments have nicknames as named entities.

    Assume that for every global_ID and local_ID, there is an associated comment
    in that row. This means that the the number of elements in each of the
    three columns is the same. If the function fails to extract the data in any
    way, raise an exception.

    The order of the three columns in raw_data_file does not matter. The function
    will rearrange the 2D list order to make the keys go "global_ID", "local_ID",
    and "named entity/category".

    Since regexes are used to find full names, nicknames, first/last names, words
    that contain these names as substrings are removed from the comments before
    extraction. For shortened first/last names, a different method other than
    regex is used so substrings are not relevant for those.

    Parameter raw_data_file: the reader object with the csvfile that you want
    to extract the data from.
    Precondition: must be a DataFrame object created from the pandas module with
    headers global ID, local ID and comment. The terms in the global and local ID
    columns must be integers.

    Parameter roster_file: the reader object containing nicknames to check for in
    the comments.
    Precondition: must be a DataFrame object created from the pandas module and
    contain the correct headers.
    """
    assertions.assert_raw_data_file_format(raw_data_file)
    assertions.assert_roster_file_format(roster_file)
    assertions.assert_team(team)
    assertions.assert_word_removal_file_format(word_file_reader, team)
    glob_ID = raw_data_file["global_ID"]
    loc_ID = raw_data_file["local_ID"]
    comm = raw_data_file["comment"]
    cmt_data_list = []
    # For column "Player", "Nicknames", "First", and "Last", include potential
    # substrings in this list that could mistake as names.
    stop_words = word_file_reader[team].tolist()
    # every comment is unique in its glob/loc ID. Maintain list of past IDs to
    # prevent having duplicate comments.
    duplicates = []
    # call create_list here to prevent redundacy
    short_f = _create_list(roster_file, "First Short")
    short_l = _create_list(roster_file, "Last Short")
    name_str = _make_name_str(roster_file["Player"], roster_file["First"],
                              roster_file["Last"])
    nickname_str = _make_nickname_str(_create_list(roster_file, "Nicknames"))
    for index in range(raw_data_file.shape[0]):
        if [glob_ID[index], loc_ID[index]
            ] not in duplicates and not pandas.isnull(comm[index]):
            new_comm = comm[index]
            try:
                for word in stop_words:
                    new_comm = new_comm.replace(word, "")
                    new_comm = new_comm.replace(string.capwords(word), "")
                _extract_entities(cmt_data_list, glob_ID[index], loc_ID[index],
                                  new_comm, name_str, nickname_str, short_f,
                                  short_l)
            except:
                raise Exception("Failed to create 2D list of named entities.")
        duplicates.append([glob_ID[index], loc_ID[index]])
    return cmt_data_list