コード例 #1
0
    def test_roster_class_pulls_all_player_stats(self, *args, **kwargs):
        flexmock(utils) \
            .should_receive('_find_year_for_season') \
            .and_return('2018')
        roster = Roster('HOU')

        assert len(roster.players) == 4

        for player in roster.players:
            assert player.name in ['James Harden', 'Tarik Black',
                                   'Ryan Anderson', 'Trevor Ariza']
コード例 #2
0
ファイル: stats.py プロジェクト: perintyler/nba_prediction
def get_data(team0,team1):
    away_roster = Roster(team0).players
    home_roster = Roster(team1).players

    away_data = away_roster.pop().dataframe.iloc[[-2]]
    for player in away_roster:
        if(team0.abbreviation, isPlayerInjured(player.player_name)):
            continue
        if(player.dataframe.empty == False):
            try :
                away_data = pd.concat([away_data, player.dataframe.iloc[[-2]]])
            except:
                print("player invalid: " + player.player_name)
    home_data = home_roster.pop().dataframe.iloc[[-2]]
    for player in home_roster:
        if(team0.abbreviation, isPlayerInjured(player.player_name)):
            continue
        if(player.dataframe.empty == False):
            try:
                home_data = pd.concat([home_data, player.dataframe.iloc[[-2]]])
            except:
                print("player invalid: " + player.player_name)
    home_data = home_data[PLAYER_STATS].sort_values(by=['usage_percentage']).head(12)
    away_data = away_data[PLAYER_STATS].sort_values(by=['usage_percentage']).head(12)

    return away_data, home_data
コード例 #3
0
    def test_roster_class_with_slim_parameter(self, *args, **kwargs):
        flexmock(utils) \
            .should_receive('_find_year_for_season') \
            .and_return('2018')
        roster = Roster('HOU', slim=True)

        assert len(roster.players) == 4
        assert roster.players == {
            'hardeja01': 'James Harden',
            'blackta01': 'Tarik Black',
            'anderry01': 'Ryan Anderson',
            'arizatr01': 'Trevor Ariza'
        }
コード例 #4
0
def get_rosters(years):
    #years is a list of years to pull the rosters from
    hinkie_roster = {}
    #    with open("hinkie_roster.txt", "w") as f:
    for year in years:
        #Gets roster for a given year
        season = "20" + year + "-{}".format(str(int(year) + 1))
        for player in Roster(team="PHI", year="20" + year).players:
            #incrementing through the player info and formatting data
            #for wordcloud.generate_from_frequencies(hinkie_roster)
            try:
                games_played = player.dataframe.loc[season].games_played[0]
                if player.name not in hinkie_roster and games_played > 0:
                    hinkie_roster[player.name] = games_played
                elif games_played > 0:
                    hinkie_roster[player.name] += games_played
            except KeyError:
                continue
#        for name, gp in hinkie_roster.items():
#                f.write("{}: Games Played: {}\n".format(name, gp))
#    f.close()
    return hinkie_roster
コード例 #5
0
ファイル: get_data.py プロジェクト: snaka0100/NCAA-NBA
def scrape_info():
    print("Getting Data and storing in Mongo(This will take some time)")
    # Creating the list of player objects.
    # This is done by going through each team, then getting the list of player objects on each roster, and storing them in a list. This project collects data only from players on an active roster, to preserve accuracy, and because we realize that trends in the modern NBA can change rapidly that attributes like the mid-range shot is not as needed today as it was years ago.



    teams = ['ATL','BRK','BOS','CHO','CHI','CLE','DAL','DEN','DET','GSW',
            'HOU','IND','LAC','LAL','MEM','MIA','MIL','MIN','NOP','NYK',
            'OKC','ORL','PHI','PHO','POR','SAC','SAS','TOR','UTA','WAS']
    player_list = []

    for team in teams:
        teamname = Roster(team)
        for players in teamname.players:
            player_list.append(players)
    print("Got all player names")




    #Storing all stats into two dictonaries now, each dictionary containing the player name as the key, and the value being the pandas dataframe that the function Player.dataframe gives us. This dataframe is a compilation of a ton of different stats, including some advanced stats. 



    nba_player_info = {}
    ncaa_player_info = {}
    for nbaplayer in player_list:
        try:
            name = nbaplayer.name
            name = name.replace("'", "")
            name = name.replace(".", "")
            split_name = name.split()
            firstname = str(split_name[0]).lower()
            lastname = str(split_name[1]).lower()
            nameid = firstname + "-" + lastname + "-1"
            ncaa_player = Player(nameid)
            nba_player_info[nbaplayer.name] = nbaplayer.dataframe
            ncaa_player_info[nbaplayer.name] = ncaa_player.dataframe 
        except(TypeError):
            pass
    print("Stored Everything in two dictionaries")



    #This is an example of the columns from each player's dataframe, seen below. The documentation contains information about what each column represents. 
    #nba_player_info['Kevin Durant'].columns



    #Here is an example of how to get a stat from the information dictionary made earlier, this gives a career average of Kevin Durant's true shooting percentage.

    #float(nba_player_info['Kevin Durant']['true_shooting_percentage']['Career'])


    #This code below stores 2 Pandas Dataframes, one for NBA statistics and one for NCAA statistics. These statistics are from the dataframe object stored from earlier, and the purpose of this is to make is much easier to access stats than the cell above. Each stat is calculated as a career total, then averaged out later.
    #First we stored information to an NBA dataframe, and printed out names that are one's we have to omit, because there is not enough data on them to store. Often, these players have empty stats lists because they are technically on the roster, but haven't played much yet. 



    nba_required_stats_list = []
    nba_names_to_drop = []
    for key, value in nba_player_info.items():
        try:
            raw_height = nba_player_info[key]['height']['Career'][-1].split('-')
            career_height = (float(raw_height[0]) * 12 ) + float(raw_height[1])
            career_weight = float(nba_player_info[key]['weight']['Career'])
            career_points = float(nba_player_info[key]['points']['Career'])
            career_games = float(nba_player_info[key]['games_played']['Career'])
            career_assists = float(nba_player_info[key]['assists']['Career'])
            defensive_rebounds = float(nba_player_info[key]['defensive_rebounds']['Career'])
            offensive_rebounds = float(nba_player_info[key]['offensive_rebounds']['Career'])
            career_turnovers = float(nba_player_info[key]['turnovers']['Career'])
            career_blocks = float(nba_player_info[key]['blocks']['Career'])
            career_steals = float(nba_player_info[key]['steals']['Career'])
            career_free_throw_percentage = float(nba_player_info[key]['free_throw_percentage']['Career'])
            career_three_point_percentage = float(nba_player_info[key]['three_point_percentage']['Career'])
            career_PER = float(nba_player_info[key]['player_efficiency_rating']['Career'])
            career_win_shares = float(nba_player_info[key]['win_shares']['Career'])
            off_win_shares = float(nba_player_info[key]['offensive_win_shares']['Career'])
            def_win_shares = float(nba_player_info[key]['defensive_win_shares']['Career'])
            career_field_goal_percentage = float(nba_player_info[key]['field_goal_percentage']['Career'])
            career_usage_percentage = float(nba_player_info[key]['usage_percentage']['Career'])
            vorp = float(nba_player_info[key]['value_over_replacement_player']['Career'][-1])
            boxplusminus = float(nba_player_info[key]['box_plus_minus']['Career'])
            true_shooting_percentage = float(nba_player_info[key]['true_shooting_percentage']['Career'])
            player_dict =  {'Name': key,
                            'Height': career_height,
                            'Weight': career_weight,
                            'Career Points': career_points,
                            'Career Games': career_games,
                            'Career Assists': career_assists,
                            'Career Def Rebounds':defensive_rebounds,
                            'Career Off Rebounds':offensive_rebounds,
                            'Career Turnovers': career_turnovers,
                            'Career Blocks': career_blocks,
                            'Career Steals': career_steals,
                            'Career Free Throw Percentage': career_free_throw_percentage,
                            'Career Three Point Percentage': career_three_point_percentage,
                            'Career Field Goal Percentage': career_field_goal_percentage,
                            'Career PER': career_PER,
                            'Career Win Shares': career_win_shares,
                            'Offensive Win Shares': off_win_shares,
                            'Defensive Win Shares': def_win_shares,
                            'Career Usage Percentage': career_usage_percentage,
                            'VORP': vorp,
                            'Box Plus Minus': boxplusminus,
                            'True Shooting Per': true_shooting_percentage
                            }
            nba_required_stats_list.append(player_dict)
        except (KeyError, TypeError):
            nba_names_to_drop.append(key)
            #print(key)
    nba_stats_df = pd.DataFrame(nba_required_stats_list)




    #nba_stats_df




    #Now to do the same for NCAA, we have to jump through a couple more hoop because of the fact that not all players have data that is available. For example, Tyler Johnson, who is in the NBA, has no NCAA data on height. To fix this, we need to create a function, that return -999 if no information is available, or return the corresponding stat as a float. This is so the code is not cluttered with a ton of if statements.   



    def stat_checker(stat):
        if(stat is None):
            return -999
        else:
            return float(stat)



    #NO VORP OR PER
    ncaa_required_stats_list = []
    ncaa_names_to_drop = []
    for key, value in ncaa_player_info.items():
        try:
            raw_height = ncaa_player_info[key]['height']['Career'][-1]
            if(raw_height == None):
                career_height = 0
            else:
                raw_height = raw_height.split('-')
                career_height = (float(raw_height[0]) * 12 ) + float(raw_height[1])        
            career_weight = stat_checker(ncaa_player_info[key]['weight']['Career'][-1])
            career_points = stat_checker(ncaa_player_info[key]['points']['Career'][-1])
            career_games = stat_checker(ncaa_player_info[key]['games_played']['Career'][-1])
            career_assists = stat_checker(ncaa_player_info[key]['assists']['Career'][-1])
            defensive_rebounds = stat_checker(ncaa_player_info[key]['defensive_rebounds']['Career'][-1])
            offensive_rebounds = stat_checker(ncaa_player_info[key]['offensive_rebounds']['Career'][-1])
            career_turnovers = stat_checker(ncaa_player_info[key]['turnovers']['Career'][-1])
            career_blocks = stat_checker(ncaa_player_info[key]['blocks']['Career'][-1])
            career_steals = stat_checker(ncaa_player_info[key]['steals']['Career'][-1])
            career_free_throw_percentage = stat_checker(ncaa_player_info[key]['free_throw_percentage']['Career'][-1])
            career_three_point_percentage = stat_checker(ncaa_player_info[key]['three_point_percentage']['Career'][-1])
            career_win_shares = stat_checker(ncaa_player_info[key]['win_shares']['Career'][-1])
            off_win_shares = stat_checker(ncaa_player_info[key]['offensive_win_shares']['Career'][-1])
            def_win_shares = stat_checker(ncaa_player_info[key]['defensive_win_shares']['Career'][-1])
            career_field_goal_percentage = stat_checker(ncaa_player_info[key]['field_goal_percentage']['Career'][-1])
            career_usage_percentage = stat_checker(ncaa_player_info[key]['usage_percentage']['Career'][-1])
            boxplusminus = stat_checker(ncaa_player_info[key]['box_plus_minus']['Career'][-1])
            true_shooting_percentage = stat_checker(ncaa_player_info[key]['true_shooting_percentage']['Career'][-1])
            player_dict =  {'Name': key,
                            'Height': career_height,
                            'Weight': career_weight,
                            'Career Points': career_points,
                            'Career Games': career_games,
                            'Career Assists': career_assists,
                            'Career Def Rebounds':defensive_rebounds,
                            'Career Off Rebounds':offensive_rebounds,
                            'Career Turnovers': career_turnovers,
                            'Career Blocks': career_blocks,
                            'Career Steals': career_steals,
                            'Career Free Throw Percentage': career_free_throw_percentage,
                            'Career Three Point Percentage': career_three_point_percentage,
                            'Career Field Goal Percentage': career_field_goal_percentage,
                            'Career Win Shares': career_win_shares,
                            'Offensive Win Shares': off_win_shares,
                            'Defensive Win Shares': def_win_shares,
                            'Career Usage Percentage': career_usage_percentage,
                            'Box Plus Minus': boxplusminus,
                            'True Shooting Per': true_shooting_percentage
                            }
            ncaa_required_stats_list.append(player_dict)
        except KeyError as err:
            print(key, "Key Error: ", err)
        except TypeError as err:
            print(key, "Type Error: ", err)
        except AttributeError as err:
            print(key, "Attribute Error: ", err)
    ncaa_stats_df = pd.DataFrame(ncaa_required_stats_list)





    #Now we have to condense the NCAA dataframe in order to include players only from the NBA. Recall the NBA dataframe where some players didn't play enough minutes in the NBA in order to record data, so we were not able to include them. So we need to make sure the names in the NCAA line up in the NBA. This also ensures a plot of x and y are represented as actual players, making it more accurate to see a transition of stats from college to the league. 





    nbanames = list(nba_stats_df["Name"])
    ncaa_stats_df = ncaa_stats_df[ncaa_stats_df['Name'].isin(nbanames)]
    ncaa_stats_df = ncaa_stats_df.reset_index(drop = True)
    #ncaa_stats_df



    #Finally, we included averages for each stat in the dataframe in order to make data analysis in the next notebook a lot easier to do and making plotting more intuitive. Having averages for stats is more accurate than total stats, considering not all player have played the same amount of games. 



    nba_stats_df["PPG"] = ''
    nba_stats_df["APG"] = ''
    nba_stats_df["TPG"] = ''
    nba_stats_df["BPG"] = ''
    nba_stats_df["SPG"] = ''
    nba_stats_df["RPG"] = ''
    ncaa_stats_df["PPG"] = ''
    ncaa_stats_df["APG"] = ''
    ncaa_stats_df["TPG"] = ''
    ncaa_stats_df["BPG"] = ''
    ncaa_stats_df["SPG"] = ''
    ncaa_stats_df["RPG"] = ''

    for index, row in nba_stats_df.iterrows():
        player = nba_stats_df.iloc[[index]]
        nba_stats_df.loc[index, "PPG"] = float(player["Career Points"] / player["Career Games"])
        nba_stats_df.loc[index, "APG"] = float(player["Career Assists"] / player["Career Games"])
        nba_stats_df.loc[index, "TPG"] = float(player["Career Turnovers"] / player["Career Games"])
        nba_stats_df.loc[index, "BPG"] = float(player["Career Blocks"] / player["Career Games"])
        nba_stats_df.loc[index, "SPG"] = float(player["Career Steals"] / player["Career Games"])
        nba_stats_df.loc[index, "RPG"] = float(player["Career Off Rebounds"] / player["Career Games"]) + float(player["Career Def Rebounds"] / player["Career Games"])
    for index, row in ncaa_stats_df.iterrows():
        player = ncaa_stats_df.iloc[[index]]
        ncaa_stats_df.loc[index, "PPG"] = float(player["Career Points"] / player["Career Games"])
        ncaa_stats_df.loc[index, "APG"] = float(player["Career Assists"] / player["Career Games"])
        ncaa_stats_df.loc[index, "TPG"] = float(player["Career Turnovers"] / player["Career Games"])
        ncaa_stats_df.loc[index, "BPG"] = float(player["Career Blocks"] / player["Career Games"])
        ncaa_stats_df.loc[index, "SPG"] = float(player["Career Steals"] / player["Career Games"])
        ncaa_stats_df.loc[index, "RPG"] = float(player["Career Off Rebounds"] / player["Career Games"]) + float(player["Career Def Rebounds"] / player["Career Games"])



    #Finally, this saves both dataframes to csv file in order to prevent the need for running these cells again. This may take some time to run considering the request of ~800 player dataframes and objects, so loading these csv files are recommended.



    players_with_college = pd.read_csv("all_seasons.csv")

    college_players = players_with_college["player_name"].tolist()
    college_names = players_with_college["college"].tolist()

    colleges = dict(zip(college_players, college_names))



    player_list = []
    not_here = []
    for player in nba_stats_df["Name"].tolist():
        try:
            if(player == "Wesley Iwundu"):
                player = "Wes Iwundu"
            elif(player == "Frank Mason III"):
                player = "Frank Mason"
            elif(player == "Sviatoslav Mykhailiuk"):
                player = "Svi Mykhailiuk"
            player_list.append((player, colleges[player]))
        except:
            try:
                player = player.replace(".", "")
                player_list.append((player, colleges[player]))
            except:
                try:
                    jr_player = player + " Jr."
                    player_list.append((player,  colleges[jr_player]))
                except:
                    try:
                        II_player = player + " II"
                        player_list.append((player,  colleges[II_player]))
                    except:
                        try:
                            III_player = player + " III"
                            player_list.append((player,  colleges[III_player]))
                        except:
                            not_here.append(player)




    player_list.append(("Bol Bol", "Oregon"))



    player_college_dict = {}
    for item in player_list:
        player_college_dict[item[0]] = item[1]

    #converting dictionary into a dataframe
    player_college_df = pd.DataFrame.from_dict(player_college_dict,orient ='index')

    #reseting the index 
    player_college_df.reset_index(inplace=True)
    #changing the name of the column
    player_college_df.rename({'index': 'Name', 0: 'school'},axis =1, inplace = True)


    #remove any school thhat is label as None
    player_college_df = player_college_df[player_college_df.school != "None"]

    #we added the Universitty at the end of each school name 
    #so we can make it clear to google that we are searching for a university
    player_college_df["school"] = player_college_df["school"] + " University"


    #adding lat and lon column
    player_college_df["lat"] = ""
    player_college_df["lng"] = ""

    #player_college_df.head()



    gkey = "AIzaSyAQzjQQxNwPTSCT4Yv2IolnWjYbXZWrsNs"
    for index, row in player_college_df.iterrows():
        
        # name of the school
        college = row["school"]
        #adding the school and key to the target url
        target_url = f'https://maps.googleapis.com/maps/api/geocode/json?address={college}&key={gkey}'

        # make request
        response = requests.get(target_url)
        
        # convert to json
        college_lat_lng = response.json()

        player_college_df.loc[index, "lat"] = college_lat_lng["results"][0]["geometry"]["location"]["lat"]
        player_college_df.loc[index, "lng"] = college_lat_lng["results"][0]["geometry"]["location"]["lng"]




    # Visualize to confirm lat lng appear
    #player_college_df





    #removing the the word "University" from the school column
    player_college_df["school"] = player_college_df["school"].str.split(" University", n = 1, expand = True)


    #combine the location informain with the condensed stat table 
    #retreving the necessary file 
    ncaa_clean = pd.DataFrame(ncaa_stats_df[["Name","Height","Weight", "PPG","APG","RPG", "SPG", "BPG","TPG", "Career Field Goal Percentage","Career Three Point Percentage"]])
    nba_clean = pd.DataFrame(nba_stats_df[["Name","Height","Weight", "PPG","APG","RPG", "SPG", "BPG","TPG", "Career Field Goal Percentage","Career Three Point Percentage"]])
    #renaming FGP and 3PG column
    ncaa_clean.rename({"Career Field Goal Percentage": "FGP", "Career Three Point Percentage": "3PG"}, axis=1,inplace = True)
    nba_clean.rename({"Career Field Goal Percentage": "FGP", "Career Three Point Percentage": "3PG"}, axis=1,inplace = True)

    #merging the nba_clean and player_college to add location information
    nba_location = pd.merge(player_college_df, nba_clean, on = "Name", how = "inner")



    #some NCAA entries are negative so we will be fixing it here


    ncaa_clean.set_index("Name",inplace = True)

    #fixing Chris clemons
    ncaa_clean.loc["Chris Clemons","Height"] = 69
    ncaa_clean.loc["Chris Clemons","Weight"] = 180
    ncaa_clean.loc["Chris Clemons","PPG"] = 24.8
    ncaa_clean.loc["Chris Clemons","APG"] = 2.6
    ncaa_clean.loc["Chris Clemons","RPG"] = 4.5
    ncaa_clean.loc["Chris Clemons","SPG"] = 1.6
    ncaa_clean.loc["Chris Clemons","BPG"] = 0.4
    ncaa_clean.loc["Chris Clemons","TPG"] = 2.5
    ncaa_clean.loc["Chris Clemons","FGP"] = 0.444
    ncaa_clean.loc["Chris Clemons","3PG"] = 0.363
    #fixing Dennis Smith Jr.
    ncaa_clean.loc["Dennis Smith Jr.","Height"] = 75
    ncaa_clean.loc["Dennis Smith Jr.","Weight"] = 195
    ncaa_clean.loc["Dennis Smith Jr.","PPG"] = 18.1
    ncaa_clean.loc["Dennis Smith Jr.","APG"] = 6.2
    ncaa_clean.loc["Dennis Smith Jr.","RPG"] = 4.6
    ncaa_clean.loc["Dennis Smith Jr.","SPG"] = 1.9
    ncaa_clean.loc["Dennis Smith Jr.","BPG"] = 0.4
    ncaa_clean.loc["Dennis Smith Jr.","TPG"] = 3.4
    ncaa_clean.loc["Dennis Smith Jr.","FGP"] = 0.455
    ncaa_clean.loc["Dennis Smith Jr.","3PG"] = 0.359
    #fixing Frank Jackson
    ncaa_clean.loc["Frank Jackson","Height"] = 75
    ncaa_clean.loc["Frank Jackson","Weight"] = 205
    ncaa_clean.loc["Frank Jackson","PPG"] = 10.9
    ncaa_clean.loc["Frank Jackson","APG"] = 1.7
    ncaa_clean.loc["Frank Jackson","RPG"] = 2.5
    ncaa_clean.loc["Frank Jackson","SPG"] = 0.6
    ncaa_clean.loc["Frank Jackson","BPG"] = 0.1
    ncaa_clean.loc["Frank Jackson","TPG"] = 1.4
    ncaa_clean.loc["Frank Jackson","FGP"] = 0.473
    ncaa_clean.loc["Frank Jackson","3PG"] = 0.395
    #fixing Gary Payton II
    ncaa_clean.loc["Gary Payton II","Height"] = 75
    ncaa_clean.loc["Gary Payton II","Weight"] = 175
    ncaa_clean.loc["Gary Payton II","PPG"] = 14.7
    ncaa_clean.loc["Gary Payton II","APG"] = 4.1
    ncaa_clean.loc["Gary Payton II","RPG"] = 7.7
    ncaa_clean.loc["Gary Payton II","SPG"] = 2.8
    ncaa_clean.loc["Gary Payton II","BPG"] = 0.8
    ncaa_clean.loc["Gary Payton II","TPG"] = 2.1
    ncaa_clean.loc["Gary Payton II","FGP"] = 0.485
    ncaa_clean.loc["Gary Payton II","3PG"] = 0.302
    #fixing Gary Trent Jr.
    ncaa_clean.loc["Gary Trent Jr.","Height"] = 78
    ncaa_clean.loc["Gary Trent Jr.","Weight"] = 209
    ncaa_clean.loc["Gary Trent Jr.","PPG"] = 14.5
    ncaa_clean.loc["Gary Trent Jr.","APG"] = 1.4
    ncaa_clean.loc["Gary Trent Jr.","RPG"] = 4.2
    ncaa_clean.loc["Gary Trent Jr.","SPG"] = 1.2
    ncaa_clean.loc["Gary Trent Jr.","BPG"] = 0.1
    ncaa_clean.loc["Gary Trent Jr.","TPG"] = 1.0
    ncaa_clean.loc["Gary Trent Jr.","FGP"] = 0.415
    ncaa_clean.loc["Gary Trent Jr.","3PG"] = 0.402
    #fixingGlenn Robinson III
    ncaa_clean.loc["Glenn Robinson III","Height"] = 78
    ncaa_clean.loc["Glenn Robinson III","Weight"] = 220
    ncaa_clean.loc["Glenn Robinson III","PPG"] = 12.0
    ncaa_clean.loc["Glenn Robinson III","APG"] = 1.0
    ncaa_clean.loc["Glenn Robinson III","RPG"] = 4.9
    ncaa_clean.loc["Glenn Robinson III","SPG"] = 1.0
    ncaa_clean.loc["Glenn Robinson III","BPG"] = 0.3
    ncaa_clean.loc["Glenn Robinson III","TPG"] = 1.0
    ncaa_clean.loc["Glenn Robinson III","FGP"] = 0.525
    ncaa_clean.loc["Glenn Robinson III","3PG"] = 0.313
    #fixing Jamal Crawford
    ncaa_clean.loc["Jamal Crawford","RPG"] = 2.8
    #fixing Jaren Jackson Jr.
    ncaa_clean.loc["Jaren Jackson Jr.","Height"] = 83
    ncaa_clean.loc["Jaren Jackson Jr.","Weight"] = 242
    ncaa_clean.loc["Jaren Jackson Jr.","PPG"] = 10.9
    ncaa_clean.loc["Jaren Jackson Jr.","RPG"] = 5.8
    ncaa_clean.loc["Jaren Jackson Jr.","APG"] = 1.1
    ncaa_clean.loc["Jaren Jackson Jr.","SPG"] = 0.6
    ncaa_clean.loc["Jaren Jackson Jr.","BPG"] = 3.0
    ncaa_clean.loc["Jaren Jackson Jr.","TPG"] = 1.8
    ncaa_clean.loc["Jaren Jackson Jr.","FGP"] = 0.513
    ncaa_clean.loc["Jaren Jackson Jr.","3PG"] = 0.396
    #fixing Kevin Porter Jr.
    ncaa_clean.loc["Kevin Porter Jr.","Height"] = 78
    ncaa_clean.loc["Kevin Porter Jr.","Weight"] = 218
    ncaa_clean.loc["Kevin Porter Jr.","PPG"] = 9.5
    ncaa_clean.loc["Kevin Porter Jr.","RPG"] = 4.0
    ncaa_clean.loc["Kevin Porter Jr.","APG"] = 1.4
    ncaa_clean.loc["Kevin Porter Jr.","SPG"] = 0.8
    ncaa_clean.loc["Kevin Porter Jr.","BPG"] = 0.5
    ncaa_clean.loc["Kevin Porter Jr.","TPG"] = 1.9
    ncaa_clean.loc["Kevin Porter Jr.","FGP"] = 0.471
    ncaa_clean.loc["Kevin Porter Jr.","3PG"] = 0.412
    #fixing Larry Nance Jr.
    ncaa_clean.loc["Larry Nance Jr.","Height"] = 80
    ncaa_clean.loc["Larry Nance Jr.","Weight"] = 235
    ncaa_clean.loc["Larry Nance Jr.","PPG"] = 11.3
    ncaa_clean.loc["Larry Nance Jr.","RPG"] = 6.6
    ncaa_clean.loc["Larry Nance Jr.","APG"] = 1.4
    ncaa_clean.loc["Larry Nance Jr.","SPG"] = 1.1
    ncaa_clean.loc["Larry Nance Jr.","BPG"] = 1.1
    ncaa_clean.loc["Larry Nance Jr.","TPG"] = 1.6
    ncaa_clean.loc["Larry Nance Jr.","FGP"] = 0.521
    ncaa_clean.loc["Larry Nance Jr.","3PG"] = 0.308
    #fixing Reggie Bullock
    ncaa_clean.loc["Reggie Bullock","Height"] = 79
    ncaa_clean.loc["Reggie Bullock","Weight"] = 205
    ncaa_clean.loc["Reggie Bullock","PPG"] = 9.9
    ncaa_clean.loc["Reggie Bullock","RPG"] = 5.0
    ncaa_clean.loc["Reggie Bullock","APG"] = 1.7
    ncaa_clean.loc["Reggie Bullock","SPG"] = 1.3
    ncaa_clean.loc["Reggie Bullock","BPG"] = 0.3
    ncaa_clean.loc["Reggie Bullock","TPG"] = 0.9
    ncaa_clean.loc["Reggie Bullock","FGP"] = 0.439
    ncaa_clean.loc["Reggie Bullock","3PG"] = 0.387
    #fixing Tim Hardaway Jr.
    ncaa_clean.loc["Tim Hardaway Jr.","Height"] = 78
    ncaa_clean.loc["Tim Hardaway Jr.","Weight"] = 205
    ncaa_clean.loc["Tim Hardaway Jr.","PPG"] = 14.3
    ncaa_clean.loc["Tim Hardaway Jr.","RPG"] = 4.1
    ncaa_clean.loc["Tim Hardaway Jr.","APG"] = 2.1
    ncaa_clean.loc["Tim Hardaway Jr.","SPG"] = 0.7
    ncaa_clean.loc["Tim Hardaway Jr.","BPG"] = 0.3
    ncaa_clean.loc["Tim Hardaway Jr.","TPG"] = 1.7
    ncaa_clean.loc["Tim Hardaway Jr.","FGP"] = 0.425
    ncaa_clean.loc["Tim Hardaway Jr.","3PG"] = 0.343
    #fixing Troy Brown Jr.
    ncaa_clean.loc["Troy Brown Jr.","Height"] = 78
    ncaa_clean.loc["Troy Brown Jr.","Weight"] = 205
    ncaa_clean.loc["Troy Brown Jr.","PPG"] = 14.3
    ncaa_clean.loc["Troy Brown Jr.","RPG"] = 4.1
    ncaa_clean.loc["Troy Brown Jr.","APG"] = 2.1
    ncaa_clean.loc["Troy Brown Jr.","SPG"] = 0.7
    ncaa_clean.loc["Troy Brown Jr.","BPG"] = 0.3
    ncaa_clean.loc["Troy Brown Jr.","TPG"] = 1.7
    ncaa_clean.loc["Troy Brown Jr.","FGP"] = 0.425
    ncaa_clean.loc["Troy Brown Jr.","3PG"] = 0.343
    #fixing Tyler Johnson
    ncaa_clean.loc["Tyler Johnson","Height"] = 76
    ncaa_clean.loc["Tyler Johnson","Weight"] = 186
    ncaa_clean.loc["Tyler Johnson","PPG"] = 10.5
    ncaa_clean.loc["Tyler Johnson","RPG"] = 4.8
    ncaa_clean.loc["Tyler Johnson","APG"] = 2.4
    ncaa_clean.loc["Tyler Johnson","SPG"] = 1.1
    ncaa_clean.loc["Tyler Johnson","BPG"] = 0.3
    ncaa_clean.loc["Tyler Johnson","TPG"] = 1.4
    ncaa_clean.loc["Tyler Johnson","FGP"] = 0.456
    ncaa_clean.loc["Tyler Johnson","3PG"] = 0.372
    #fixing Tyler Johnson
    ncaa_clean.loc["Tyler Johnson","Height"] = 76
    ncaa_clean.loc["Tyler Johnson","Weight"] = 186
    ncaa_clean.loc["Tyler Johnson","PPG"] = 10.5
    ncaa_clean.loc["Tyler Johnson","RPG"] = 4.8
    ncaa_clean.loc["Tyler Johnson","APG"] = 2.4
    ncaa_clean.loc["Tyler Johnson","SPG"] = 1.1
    ncaa_clean.loc["Tyler Johnson","BPG"] = 0.3
    ncaa_clean.loc["Tyler Johnson","TPG"] = 1.4
    ncaa_clean.loc["Tyler Johnson","FGP"] = 0.456
    ncaa_clean.loc["Tyler Johnson","3PG"] = 0.372
    #fixing Vince Carter
    ncaa_clean.loc["Vince Carter","RPG"] = 4.5
    ncaa_clean.loc["Vince Carter","TPG"] = 1.3
    #fixing Wendell Carter Jr.
    ncaa_clean.loc["Wendell Carter Jr.","Height"] = 82
    ncaa_clean.loc["Wendell Carter Jr.","Weight"] = 259
    ncaa_clean.loc["Wendell Carter Jr.","PPG"] = 13.5
    ncaa_clean.loc["Wendell Carter Jr.","RPG"] = 9.1
    ncaa_clean.loc["Wendell Carter Jr.","APG"] = 2.0
    ncaa_clean.loc["Wendell Carter Jr.","SPG"] = 0.8
    ncaa_clean.loc["Wendell Carter Jr.","BPG"] = 2.1
    ncaa_clean.loc["Wendell Carter Jr.","TPG"] = 2.0
    ncaa_clean.loc["Wendell Carter Jr.","FGP"] = 0.561
    ncaa_clean.loc["Wendell Carter Jr.","3PG"] = 0.413

    #dropping Lou Willima since he came straight out of HS
    ncaa_clean.drop(index='Lou Williams',inplace = True)

    #replace all of the -999 to nan
    ncaa_clean = ncaa_clean.replace(-999.0, np.nan)
    # ncaa_clean



    ncaa_clean.reset_index(inplace = True)



    #exporting the csv files in case of mongo failure
    #nba_clean.to_csv('static/assets/data/NBA_Data.csv', index = False)
    #ncaa_clean.to_csv('static/assets/data/NCAA_Data.csv', index = False)
    #nba_location.to_csv('static/assets/data/NBA_Location.csv', index = False)




    #Now putting these dataframes into local mongo database. So that app.py flask app can pull the data from Mongo and store them as csv's



    import pymongo

    conn = 'mongodb+srv://sabu:[email protected]/test'
    client = pymongo.MongoClient(conn)

    ncaa_clean = ncaa_clean.replace(np.nan, 0)

    # Declare the database
    db = client.NBA_NCAA
    db.NBA.drop()
    db.NCAA.drop()
    db.NBA_Location.drop()

    # Declare the collection
    NBA = db.NBA
    NCAA = db.NCAA
    NBA_Location = db.NBA_Location

    #Convert the NBA dataframe into a dictionary and insert into Mongo
    NBA_dict = nba_clean.to_dict('records')
    NBA.insert_many(NBA_dict)

    #Convert the NCAAl dataframe into a dictionary and insert into Mongo.

    NCAA_dict = ncaa_clean.to_dict('records')
    NCAA.insert_many(NCAA_dict)

    #Convert the NCAAl dataframe into a dictionary and insert into Mongo.
    NBA_Location_dict = nba_location.to_dict('records')
    NBA_Location.insert_many(NBA_Location_dict)

    print("Done. Data stored in Mongo")
コード例 #6
0
from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from xgboost import XGBRegressor
from sportsreference.nba.roster import Roster, Player
from sportsreference.nba.player import AbstractPlayer
from sportsreference.nba.teams import Teams
from sklearn.metrics import r2_score, mean_squared_error

teams = Teams()
teamabbs = []
squadnames = []
for team in teams:
    teamabbs.append(team.abbreviation)
for abb in teamabbs:
    squad = Roster(abb, slim=True)
    squaddict = squad.players
    squadnames.append(list(squaddict.values()))
mergednames = list(itertools.chain.from_iterable(squadnames))

rawpast = pd.read_csv(
    "https://raw.githubusercontent.com/Build-Week-NBA-Longevity-Predictor/Data-Science/master/1976_to_2015_Draftees_edit2.csv"
)
rawpast = rawpast[rawpast['Yrs'] != 0]
past = rawpast[~rawpast['Player'].isin(mergednames)]

print("Train Test Split")
train, val = train_test_split(past,
                              train_size=0.80,
                              test_size=0.20,
                              random_state=42)
コード例 #7
0
 def test_bad_url_raises_value_error(self, *args, **kwargs):
     with pytest.raises(ValueError):
         roster = Roster('BAD')