Esempio n. 1
0
class Backtester:

    odds_calculator = None
    universal = None

    user_agents = []
    league_teams = []

    #can be nba, nhl, nfl, mlb
    league = "nba"

    num_periods = {'nba': 4, 'nhl': 3, 'nfl': 4, 'mlb': 9}

    #algo_version = "Algo_V1" or "Algo_V2"
    def __init__(self, league, algo_version):
        self.league = league.lower()
        self.algo_version = algo_version

        self.odds_calculator = Odds_Calculator(self.league)
        self.universal = Universal_Functions(self.league)

        self.league_teams = self.universal.load_league_teams()

    #backtests algorithm for games played on certain days
    #goes from start_date to end_date one day at a time during the season, and analyzes games played on those days
    #each day takes about 4 seconds on my desktop
    def backtest_csv_output(self, start_date, end_date):

        #breaks up date
        temp = start_date.split("-")
        month = int(temp[0])
        day = int(temp[1])
        year = int(temp[2])

        cur_date = start_date

        content = []
        #this is actually a do-while loop
        while True:
            games = self.universal.get_games(cur_date)
            print("date: " + cur_date)

            for game in games:
                print("  Game: " + str(game))

            to_save = []
            to_save.append([
                cur_date, "Away", "Home", "Algo points", "Proj winner",
                "Winner", "", "", "Score"
            ])
            for x in range(0, len(games)):

                #team might not exist anymore, so don't count that game
                if len(games[x]['team1']) != 0 and len(games[x]['team2']) != 0:

                    data1 = self.universal.load_data(games[x]['team1'],
                                                     games[x]['date'])
                    data2 = self.universal.load_data(games[x]['team2'],
                                                     games[x]['date'])
                    # print("Teams: "+str(games[x]['team1'])+" | "+str(games[x]['team2']))
                    # print("date: "+str(games[x]['date']))
                    # print(data1)
                    # print(data1[0]['dates'][0])
                    # print(data1[0]['dates'][-1])
                    returned1 = self.odds_calculator.analyze2(
                        games[x]['team1'], games[x]['team2'], data1, "away")
                    returned2 = self.odds_calculator.analyze2(
                        games[x]['team2'], games[x]['team1'], data2, "home")

                    # print(returned1)
                    # print()
                    # print(returned2)
                    # print()

                    algo = Algo(self.league)
                    if self.algo_version == "Algo_V1":
                        algo_data = algo.calculate(games[x]['date'], returned1,
                                                   returned2)
                    elif self.algo_version == "Algo_V2":
                        algo_data = algo.calculate_V2(games[x]['date'],
                                                      returned1, returned2)
                    total = algo_data['total']

                    to_add = []
                    to_add.append("")
                    to_add.append(games[x]['team1'][0])
                    to_add.append(games[x]['team2'][0])
                    to_add.append(total)

                    if self.algo_version == "Algo_V1":
                        #categorizes odds (points)
                        levels = []
                        levels.append([0, 3])
                        levels.append([3, 6])
                        levels.append([6, 9])
                        levels.append([9, 12])
                        levels.append([12, 15])
                        levels.append([15, 18])
                        levels.append([18, 21])
                        levels.append([21, 24])
                        levels.append([24, 27])
                        levels.append([27, 100])
                    elif self.algo_version == "Algo_V2":
                        #categorizes odds (percentage)
                        levels = []
                        levels.append([50, 55])
                        levels.append([55, 60])
                        levels.append([60, 65])
                        levels.append([65, 70])
                        levels.append([70, 75])
                        levels.append([75, 80])
                        levels.append([80, 85])
                        levels.append([85, 90])
                        levels.append([90, 95])
                        levels.append([95, 100])

                    level = 0
                    for y in range(0, len(levels)):
                        if (total >= levels[y][0] and total < levels[y][1]
                            ) or (total * -1 >= levels[y][0]
                                  and total * -1 < levels[y][1]):
                            level = y + 1

                    #appends projected team
                    if total > 0:
                        to_add.append(games[x]['team1'][0])
                    elif total <= 0:
                        to_add.append(games[x]['team2'][0])

                    #appends winning team
                    if games[x]['game_scores'][0] > games[x]['game_scores'][1]:
                        to_add.append(games[x]['team1'][0])
                    else:
                        to_add.append(games[x]['team2'][0])

                    #appends score
                    score = str(games[x]['game_scores'][0]) + "-" + str(
                        games[x]['game_scores'][1])
                    to_add.append(score)

                    # #appends algo data
                    # if to_add[-2]==to_add[-3]:
                    # 	to_add.append("")
                    # 	if self.algo_version=="Algo_V1":
                    # 		to_add.append(str(level))
                    # 	elif self.algo_version=="Algo_V2":
                    # 		to_add.append(total)
                    # elif to_add[-3]!="":
                    # 	if self.algo_version=="Algo_V1":
                    # 		to_add.append(str(level))
                    # 	elif self.algo_version=="Algo_V2":
                    # 		to_add.append(total)
                    # 	to_add.append("")
                    # else:
                    # 	to_add.append("")
                    # 	to_add.append("")

                    #appends algo data
                    if to_add[-2] == to_add[-3]:
                        to_add.append("")
                        to_add.append(str(level))
                    elif to_add[-3] != "":
                        to_add.append(str(level))
                        to_add.append("")
                    else:
                        to_add.append("")
                        to_add.append("")

                    #appends betting odds
                    if self.algo_version == "Algo_V1":
                        odds_calculator = Odds_Calculator(self.league)
                        odds = odds_calculator.get_odds(total)
                    elif self.algo_version == "Algo_V2":
                        odds = abs(total)

                    favorable_odds = (100 / (100 - abs(odds)) - 1) * 100
                    underdog_odds = (100 / (100 - abs(odds)) - 1) * 100
                    if total > 0:
                        to_add.append("-" + str(favorable_odds))
                        to_add.append("+" + str(underdog_odds))
                    else:
                        to_add.append("+" + str(underdog_odds))
                        to_add.append("-" + str(favorable_odds))

                    to_save.append(to_add)

            #space between data
            to_save.append(["", "", "", "", "", "", "", "", ""])

            #only saves day's games if there were actually games on that day
            if len(to_save) > 2:
                content.append(to_save)

            #breaks loop to act like do-while
            if cur_date == end_date:
                break

            day += 1
            if day > 31:
                month += 1
                day = 1

            #doesn't increment year since the season's year doesn't change
            if month > 12:
                month = 1
                day = 1

            #increments season at the end of the season to sometime in the middle
            if self.league == "nba":
                if "4-1-" in cur_date:
                    year += 1
                    month = 2
                    day = 1
            elif self.league == "nhl":
                if "4-1-" in cur_date:
                    year += 1
                    month = 2
                    day = 1
            elif self.league == "mlb":
                if "10-1-" in cur_date:
                    year += 1
                    month = 7
                    day = 1

            cur_date = str(month) + "-" + str(day) + "-" + str(year)

        #has most recent games first
        content.reverse()

        to_save = []
        for x in range(0, len(content)):
            for y in range(0, len(content[x])):
                to_save.append(content[x][y])

        if start_date != end_date:
            self.universal.save_to_csv(
                "./" + str(self.league) + "/analyze/" + str(self.league) +
                "_" + str(self.algo_version) + "_" + str(start_date) + "_" +
                str(end_date) + "_analysis.csv", to_save)
        else:
            self.universal.save_to_csv(
                "./" + str(self.league) + "/analyze/" + str(self.league) +
                "_" + str(self.algo_version) + "_" + str(end_date) +
                "_analysis.csv", to_save)

    #backtests algo_V2 for games played on each day
    #goes from start_date to end_date one day at a time during the season, runs algo_V2 on those days, compares projected odds versus oddsportal odds, and simulates betting
    # each day takes about 4 seconds on my desktop
    def backtest_odds(self, start_date, end_date):

        #breaks up date
        temp = start_date.split("-")
        month = int(temp[0])
        day = int(temp[1])
        year = int(temp[2])

        cur_date = start_date

        content = []
        #this is actually a do-while loop
        while True:
            games = self.universal.get_games(cur_date)
            print("date: " + cur_date)

            for game in games:
                print("  Game: " + str(game))

            # - Strategy 0.0: Bet on algo's projected winner, no matter the odds.
            # - Strategy 0.1: Bet on oddsmaker's projected winner, no matter the odds.
            # All below strategies incorporate placing a bet if the algorithm projects a team to win more often than the oddsmaker projects
            # - Strategy 1: Default strategy.
            # - Strategy 2: Placing a bet if that team is also the algo's favorite.
            # - Strategy 3: Placing a bet if that team is the algo's favorite, and the oddsmaker's underdog.
            # - Strategy 4: Placing a bet if the difference between the algorithm's projected odds and the oddsmaker's odds is also >= 45

            to_save = []
            strat00 = {"total_bet": 0, "total_win": 0}
            strat01 = {"total_bet": 0, "total_win": 0}
            strat1 = {"total_bet": 0, "total_win": 0}
            strat2 = {"total_bet": 0, "total_win": 0}
            strat3 = {"total_bet": 0, "total_win": 0}
            strat4 = {"total_bet": 0, "total_win": 0}
            for x in range(0, len(games)):

                #team might not exist anymore, so don't count that game
                if len(games[x]['team1']) != 0 and len(games[x]['team2']) != 0:

                    data1 = self.universal.load_data(games[x]['team1'],
                                                     games[x]['date'])
                    data2 = self.universal.load_data(games[x]['team2'],
                                                     games[x]['date'])

                    returned1 = self.odds_calculator.analyze2(
                        games[x]['team1'], games[x]['team2'], data1, "away")
                    returned2 = self.odds_calculator.analyze2(
                        games[x]['team2'], games[x]['team1'], data2, "home")

                    # print(returned1)
                    # print()
                    # print(returned2)
                    # print()

                    algo = Algo(self.league)
                    algo_data = algo.calculate_V2(games[x]['date'], returned1,
                                                  returned2)
                    total = algo_data['total']

                    # to_return={}
                    # to_return['record_points']=             odds['records']
                    # to_return['home_away_points']=          odds['home_away']
                    # to_return['home_away_10_games_points']= odds['home_away_10_games']
                    # to_return['last_10_games_points']=      odds['last_10_games']
                    # to_return['avg_points']=                odds['avg_points']
                    # to_return['avg_points_10_games']=       odds['avg_points_10_games']
                    # # to_return['win_streak']=                win_streak
                    # to_return['win_streak_home_away']=      odds['win_streak_home_away']
                    # to_return['total']=                     self.universal.convert_number(average)

                    odds = abs(total)

                    favorable_odds = round((100 / (100 - abs(odds)) - 1) * 100)
                    underdog_odds = round((100 / (100 - abs(odds)) - 1) * 100)

                    # print(str(year)+" | "+str(games[x]['team1'])+" | "+str(games[x]['team2'])+" | "+str(games[x]['game_scores']))

                    oddsportal_odds = self.universal.get_odds_game(
                        year, games[x]['team1'], games[x]['team2'],
                        games[x]['game_scores'])

                    if oddsportal_odds[0] != 0:
                        to_add = []
                        #date
                        to_add.append(cur_date)
                        #away
                        to_add.append(games[x]['team1'][1])
                        #home
                        to_add.append(games[x]['team2'][1])
                        #Algo Proj
                        to_add.append(str(total) + "%")
                        #Away proj
                        away_proj = 0
                        if total < 0:
                            away_proj = favorable_odds
                        else:
                            away_proj = underdog_odds * -1
                        to_add.append(away_proj)
                        #Home proj
                        home_proj = 0
                        if total > 0:
                            home_proj = favorable_odds
                        else:
                            home_proj = underdog_odds * -1
                        to_add.append(home_proj)
                        #Away odds
                        to_add.append(oddsportal_odds[0])
                        #Home odds
                        to_add.append(oddsportal_odds[1])
                        #Diff Away
                        away_diff = 0
                        if abs(away_proj - oddsportal_odds[0]) > 200:
                            away_diff = abs(away_proj -
                                            oddsportal_odds[0]) - 200
                        else:
                            away_diff = abs(away_proj - oddsportal_odds[0])
                        to_add.append(away_diff)
                        #Diff Home
                        home_diff = 0
                        if abs(home_proj - oddsportal_odds[1]) > 200:
                            home_diff = abs(home_proj -
                                            oddsportal_odds[1]) - 200
                        else:
                            home_diff = abs(home_proj - oddsportal_odds[1])
                        to_add.append(home_diff)

                        ## Strategy 0.0 ##
                        if away_proj < 0:
                            #Bet
                            to_add.append("$100")
                            strat00['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[0] > 0):
                                to_win = 100 * (oddsportal_odds[0] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[0] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][0] > games[x][
                                    'game_scores'][1]:
                                to_add.append("$" + str(100 + to_win))
                                strat00['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")
                        else:
                            #Bet
                            to_add.append("$100")
                            strat00['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[1] > 0):
                                to_win = 100 * (oddsportal_odds[1] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[1] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][1] > games[x][
                                    'game_scores'][0]:
                                to_add.append("$" + str(100 + to_win))
                                strat00['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")

                        ## Strategy 0.1 ##
                        if oddsportal_odds[0] < 0 and oddsportal_odds[
                                0] < oddsportal_odds[1]:
                            #Bet
                            to_add.append("$100")
                            strat01['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[0] > 0):
                                to_win = 100 * (oddsportal_odds[0] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[0] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][0] > games[x][
                                    'game_scores'][1]:
                                to_add.append("$" + str(100 + to_win))
                                strat01['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")
                        else:
                            #Bet
                            to_add.append("$100")
                            strat01['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[1] > 0):
                                to_win = 100 * (oddsportal_odds[1] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[1] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][1] > games[x][
                                    'game_scores'][0]:
                                to_add.append("$" + str(100 + to_win))
                                strat01['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")

                        ## Strategy 1 ##
                        if oddsportal_odds[0] > away_proj:
                            #Bet
                            to_add.append("$100")
                            strat1['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[0] > 0):
                                to_win = 100 * (oddsportal_odds[0] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[0] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][0] > games[x][
                                    'game_scores'][1]:
                                to_add.append("$" + str(100 + to_win))
                                strat1['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")
                        elif oddsportal_odds[1] > home_proj:
                            #Bet
                            to_add.append("$100")
                            strat1['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[1] > 0):
                                to_win = 100 * (oddsportal_odds[1] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[1] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][1] > games[x][
                                    'game_scores'][0]:
                                to_add.append("$" + str(100 + to_win))
                                strat1['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")
                        else:
                            to_add.append("")
                            to_add.append("")
                            to_add.append("")

                        ## Strategy 2 ##
                        if oddsportal_odds[0] > away_proj and away_proj < 0:
                            #Bet
                            to_add.append("$100")
                            strat2['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[0] > 0):
                                to_win = 100 * (oddsportal_odds[0] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[0] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][0] > games[x][
                                    'game_scores'][1]:
                                to_add.append("$" + str(100 + to_win))
                                strat2['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")
                        elif oddsportal_odds[1] > home_proj and home_proj < 0:
                            #Bet
                            to_add.append("$100")
                            strat2['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[1] > 0):
                                to_win = 100 * (oddsportal_odds[1] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[1] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][1] > games[x][
                                    'game_scores'][0]:
                                to_add.append("$" + str(100 + to_win))
                                strat2['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")
                        else:
                            to_add.append("")
                            to_add.append("")
                            to_add.append("")

                        ## Strategy 3 ##
                        if oddsportal_odds[
                                0] > away_proj and away_proj < 0 and oddsportal_odds[
                                    0] > 0:
                            #Bet
                            to_add.append("$100")
                            strat3['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[0] > 0):
                                to_win = 100 * (oddsportal_odds[0] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[0] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][0] > games[x][
                                    'game_scores'][1]:
                                to_add.append("$" + str(100 + to_win))
                                strat3['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")
                        elif oddsportal_odds[
                                1] > home_proj and home_proj < 0 and oddsportal_odds[
                                    1] > 0:
                            #Bet
                            to_add.append("$100")
                            strat3['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[1] > 0):
                                to_win = 100 * (oddsportal_odds[1] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[1] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][1] > games[x][
                                    'game_scores'][0]:
                                to_add.append("$" + str(100 + to_win))
                                strat3['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")
                        else:
                            to_add.append("")
                            to_add.append("")
                            to_add.append("")

                        ## Strategy 4 ##
                        if self.league == "mlb":
                            diff_amount = 45
                        elif self.league == "nba":
                            diff_amount = 100

                        if oddsportal_odds[
                                0] > away_proj and away_diff >= diff_amount:
                            #Bet
                            to_add.append("$100")
                            strat4['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[0] > 0):
                                to_win = 100 * (oddsportal_odds[0] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[0] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][0] > games[x][
                                    'game_scores'][1]:
                                to_add.append("$" + str(100 + to_win))
                                strat4['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")
                        elif oddsportal_odds[
                                1] > home_proj and home_diff >= diff_amount:
                            #Bet
                            to_add.append("$100")
                            strat4['total_bet'] += 100
                            #To Win
                            to_win = 0
                            if (oddsportal_odds[1] > 0):
                                to_win = 100 * (oddsportal_odds[1] / 100)
                            else:
                                to_win = 100 / (oddsportal_odds[1] * -1 / 100)
                            to_add.append("$" + str(to_win))
                            #Won
                            if games[x]['game_scores'][1] > games[x][
                                    'game_scores'][0]:
                                to_add.append("$" + str(100 + to_win))
                                strat4['total_win'] += (100 + to_win)
                            else:
                                to_add.append("$0")
                        else:
                            to_add.append("")
                            to_add.append("")
                            to_add.append("")

                    else:
                        to_add = []

                    # #appends winning team
                    # if games[x]['game_scores'][0]>games[x]['game_scores'][1]:
                    # 	to_add.append(games[x]['team1'][0])
                    # else:
                    # 	to_add.append(games[x]['team2'][0])

                    # #appends score
                    # score=str(games[x]['game_scores'][0])+"-"+str(games[x]['game_scores'][1])
                    # to_add.append(score)

                    if len(to_add) != 0:
                        to_save.append(to_add)

            # to_save.append(["Date", "Away", "Home", "Algo proj", "Away proj", "Home proj", "Away odds", "Home odds", "Diff away", "Diff home", "Bet", "To win", "Won"])

            #only saves day's games if there were actually games on that day
            if len(to_save) > 2:

                #summary
                strat00_profit = strat00['total_win'] - strat00['total_bet']
                strat00_perc = strat00_profit / strat00['total_bet'] * 100
                strat01_profit = strat01['total_win'] - strat01['total_bet']
                strat01_perc = strat01_profit / strat01['total_bet'] * 100
                strat1_profit = strat1['total_win'] - strat1['total_bet']
                strat1_perc = strat1_profit / strat1['total_bet'] * 100
                strat2_profit = strat2['total_win'] - strat2['total_bet']
                if (strat2['total_bet'] > 0):
                    strat2_perc = strat2_profit / strat2['total_bet'] * 100
                else:
                    strat2_perc = 0
                strat3_profit = strat3['total_win'] - strat3['total_bet']
                if (strat3['total_bet'] > 0):
                    strat3_perc = strat3_profit / strat3['total_bet'] * 100
                else:
                    strat3_perc = 0
                strat4_profit = strat4['total_win'] - strat4['total_bet']
                if (strat4['total_bet'] > 0):
                    strat4_perc = strat4_profit / strat4['total_bet'] * 100
                else:
                    strat4_perc = 0

                #initializes with buffer columns
                summary = ["", "", "", "", "", "", "", "", "", ""]
                summary.append("$" + str(strat00['total_bet']))
                summary.append("$" + str(strat00_profit))
                summary.append(str(strat00_perc) + "%")
                summary.append("$" + str(strat01['total_bet']))
                summary.append("$" + str(strat01_profit))
                summary.append(str(strat01_perc) + "%")
                summary.append("$" + str(strat1['total_bet']))
                summary.append("$" + str(strat1_profit))
                summary.append(str(strat1_perc) + "%")
                summary.append("$" + str(strat2['total_bet']))
                summary.append("$" + str(strat2_profit))
                summary.append(str(strat2_perc) + "%")
                summary.append("$" + str(strat3['total_bet']))
                summary.append("$" + str(strat3_profit))
                summary.append(str(strat3_perc) + "%")
                summary.append("$" + str(strat4['total_bet']))
                summary.append("$" + str(strat4_profit))
                summary.append(str(strat4_perc) + "%")
                to_save.append(summary)

                #space between data
                to_save.append([
                    "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
                    "", "", "", "", "", "", "", "", "", ""
                ])

                content.append(to_save)

            #breaks loop to act like do-while
            if cur_date == end_date:
                break

            day += 1
            if day > 31:
                month += 1
                day = 1

            #doesn't increment year since the season's year doesn't change
            if month > 12:
                month = 1
                day = 1

            #increments season at the end of the season to sometime in the middle
            if self.league == "nba":
                # if "4-1-" in cur_date:
                # 	year+=1
                # 	month=2
                # 	day=1
                if "4-1-" in cur_date:
                    year += 1
                    month = 1
                    day = 15
            elif self.league == "nhl":
                if "4-1-" in cur_date:
                    year += 1
                    month = 2
                    day = 1
            elif self.league == "mlb":
                if "10-1-" in cur_date:
                    year += 1
                    month = 7
                    day = 1

            cur_date = str(month) + "-" + str(day) + "-" + str(year)

        # #has most recent games first
        # content.reverse()

        to_save = []
        to_save.append([
            "Date", "Away", "Home", "Algo proj", "Away proj", "Home proj",
            "Away odds", "Home odds", "Diff away", "Diff home", "Bet",
            "To win", "Won", "Bet", "To win", "Won", "Bet", "To win", "Won",
            "Bet", "To win", "Won"
        ])
        for x in range(0, len(content)):
            for y in range(0, len(content[x])):
                to_save.append(content[x][y])

        if start_date != end_date:
            self.universal.save_to_csv(
                "./" + str(self.league) + "/analyze/" + str(self.league) +
                "_Algo_V2_" + str(start_date) + "_" + str(end_date) +
                "_backtest_odds.csv", to_save)
        else:
            self.universal.save_to_csv(
                "./" + str(self.league) + "/analyze/" + str(self.league) +
                "_Algo_V2_" + str(end_date) + "_backtest_odds.csv", to_save)

    #backtests algorithm for games played on certain days
    def backtest_algo(self, start_date, end_date, algo):

        #breaks up date
        temp = start_date.split("-")
        month = int(temp[0])
        day = int(temp[1])
        year = int(temp[2])

        cur_date = start_date

        #creates saving path if doesn't exist
        algo_list = str(algo).replace("[", "").replace("]",
                                                       "").replace(" ", "")

        path = "./" + str(self.league) + "/analyze/backtests/" + str(
            self.algo_version) + "_" + str(algo_list) + "_" + str(
                start_date) + "_" + str(end_date) + ".txt"

        if os.path.exists(path) == True:
            return

        #gets list of dates to backtest
        dates = []
        while True:
            dates.append(cur_date)

            #breaks loop to act like do-while
            if cur_date == end_date:
                break

            print("Cur date: " + str(cur_date) + " | End date: " +
                  str(end_date))

            day += 1
            if day > 31:
                month += 1
                day = 1

            #doesn't increment year since the season's year doesn't change
            if month > 12:
                month = 1
                day = 1

            #increments season once April is reached since it's so close to the end of the season
            if self.league == "nhl":
                if "4-1-" in cur_date:
                    year += 1
                    month = 2
                    day = 1
            elif self.league == "nba":
                if "4-1-" in cur_date:
                    year += 1
                    month = 2
                    day = 1
            elif self.league == "mlb":
                if "10-1-" in cur_date:
                    year += 1
                    month = 7
                    day = 1

            cur_date = str(month) + "-" + str(day) + "-" + str(year)

        #6 is hardcoded in self.backtest_algo2
        # 6 processes takes up 75% of my desktop CPU, and backtest takes 20 min to complete
        # c4.4xlarge has 16 logical processors, and takes 4.5 min to complete
        # 6 processes for the c4.4xlarge takes ~10 min to complete
        num_processes = 16
        processes = []
        #creates processes
        for x in range(0, num_processes):
            path = "./" + str(self.league) + "/analyze/backtests/" + str(
                algo_list) + "_temp" + str(x) + ".csv"
            if os.path.exists(path) == False:
                process = Process(target=self.backtest_algo2,
                                  args=(
                                      x,
                                      dates,
                                      algo,
                                  ))
                processes.append(process)

        #starts processes
        for x in range(0, len(processes)):
            processes[x].start()

        #joins them so they don't wait for each other
        for x in range(0, len(processes)):
            processes[x].join()

        # self.backtest_algo2(0, dates, algo)

        #loads results from processes since I you can't return anything from processes
        wins = [0] * 11
        losses = [0] * 11
        for x in range(0, num_processes):
            path = "./" + str(self.league) + "/analyze/backtests/" + str(
                algo_list) + "_temp" + str(x) + ".csv"
            contents = self.universal.read_from_csv(path)

            for y in range(0, len(contents)):
                losses[y] += int(contents[y][0])
                wins[y] += int(contents[y][1])

            os.remove(path)

        to_output = []
        to_output.append(str(algo))
        to_output.append("")

        total_wins = 0
        total_losses = 0

        #starts at 1 since levels start at 1
        for x in range(1, len(wins)):
            total_wins += wins[x]
            total_losses += losses[x]
            if wins[x] + losses[x] != 0:
                perc_won = wins[x] / (wins[x] + losses[x]) * 100
            else:
                perc_won = "N/A"
            to_output.append(
                str(x) + ": " + str(losses[x]) + " - " + str(wins[x]) + ": " +
                str(perc_won))
        to_output.append("")
        to_output.append(str(total_losses) + " - " + str(total_wins))

        path = "./" + str(self.league) + "/analyze/backtests/" + str(
            self.algo_version) + "_" + str(algo_list) + "_" + str(
                start_date) + "_" + str(end_date) + ".txt"
        self.universal.save_to_txt(path, to_output)

    #used in backtest_algo()
    def backtest_algo2(self, number, dates, algo):
        #level number corresponds to an index
        wins = [0] * 11
        losses = [0] * 11

        #creates processes
        num_processes = 16
        start = int(number * (len(dates) / num_processes))
        end = int((number + 1) * (len(dates) / num_processes))

        print()
        print(number)
        print(start)
        print(end)
        print()

        #this is actually a do-while loop
        for x in range(start, end):
            print("At " + str(dates[x]) + " END: " + str(dates[end - 1]))

            games = self.universal.get_games(dates[x])

            for y in range(0, len(games)):

                #team might not exist anymore, so don't count that game
                if len(games[y]['team1']) != 0 and len(games[y]['team2']) != 0:

                    data1 = self.universal.load_data(games[y]['team1'],
                                                     games[y]['date'])
                    data2 = self.universal.load_data(games[y]['team2'],
                                                     games[y]['date'])
                    returned1 = self.odds_calculator.analyze2(
                        games[y]['team1'], games[y]['team2'], data1, "away")
                    returned2 = self.odds_calculator.analyze2(
                        games[y]['team2'], games[y]['team1'], data2, "home")

                    algorithm = Algo(self.league)
                    #sets algo to backtest
                    algorithm.algorithm[self.league] = algo
                    if self.algo_version == "Algo_V1":
                        algo_data = algorithm.calculate(
                            games[y]['date'], returned1, returned2)
                    elif self.algo_version == "Algo_V2":
                        algo_data = algorithm.calculate_V2(
                            games[y]['date'], returned1, returned2)
                    total = algo_data['total']

                    if self.algo_version == "Algo_V1":
                        #categorizes odds
                        levels = []
                        levels.append([0, 3])
                        levels.append([3, 6])
                        levels.append([6, 9])
                        levels.append([9, 12])
                        levels.append([12, 15])
                        levels.append([15, 18])
                        levels.append([18, 21])
                        levels.append([21, 24])
                        levels.append([24, 27])
                        levels.append([27, 100])
                    elif self.algo_version == "Algo_V2":
                        #categorizes odds
                        levels = []
                        levels.append([50, 55])
                        levels.append([55, 60])
                        levels.append([60, 65])
                        levels.append([65, 70])
                        levels.append([70, 75])
                        levels.append([75, 80])
                        levels.append([80, 85])
                        levels.append([85, 90])
                        levels.append([90, 95])
                        levels.append([95, 100])

                    level = 0
                    for z in range(0, len(levels)):
                        if (total >= levels[z][0] and total < levels[z][1]
                            ) or (total * -1 >= levels[z][0]
                                  and total * -1 < levels[z][1]):
                            level = z + 1

                    # #0 is team1, and 1 is team2
                    # projected_team=0
                    # if self.league=="nba":
                    # 	#if team1 is projected to win
                    # 	if total>0:
                    # 		projected_team=0
                    # 	#go with home team
                    # 	elif total<=0:
                    # 		projected_team=1
                    # else:
                    # 	#if team1 is projected to win
                    # 	if total>0:
                    # 		projected_team=0
                    # 	#go with home team
                    # 	elif total<=0:
                    # 		projected_team=1

                    #0 is team1, and 1 is team2
                    #if team1 is projected to win
                    if total > 0:
                        projected_team = 0
                    #go with home team
                    elif total <= 0:
                        projected_team = 1

                    #0 is team1, and 1 is team2
                    winning_team = 0
                    if games[y]['game_scores'][0] > games[y]['game_scores'][1]:
                        winning_team = 0
                    else:
                        winning_team = 1

                    #if algo was right
                    if projected_team == winning_team:
                        wins[level] += 1
                    else:
                        losses[level] += 1

        temp = []

        for x in range(0, len(wins)):
            temp.append([losses[x], wins[x]])

        algo_list = str(algo).replace("[", "").replace("]",
                                                       "").replace(" ", "")
        path = "./" + str(self.league) + "/analyze/backtests/" + str(
            algo_list) + "_temp" + str(number) + ".csv"
        self.universal.save_to_csv(path, temp)
class ESPN_Scraper:

    opener = None
    universal = None
    user_agents = []

    output = []
    output_path = ""

    #can be nba, nhl, nfl, mlb
    league = "nba"

    def __init__(self, league):
        self.league = league.lower()
        self.universal = Universal_Functions(league.lower())
        self.initialize_user_agents()

        self.output = []
        today = self.universal.get_today()
        time = self.universal.get_current_time()
        #10:45 becomes 1045
        time = time.replace(":", "")
        self.output_path = "./" + league + "/output/espn_scraper_" + str(
            today['month']) + "-" + str(today['day']) + "-" + str(
                today['year']) + "_" + str(time) + ".txt"
        self.output.append("---- Start output ----")

    def update_data(self, team, year):

        #scrapes all years worth of data
        if year == "":
            years = self.get_seasons(team)
            start = 0
            end = len(years)
        #scrapes certain years worth fo data
        else:
            years = [year]
            start = 0
            end = 1

        for x in range(start, end):

            if self.league == "nhl":
                url = "http://espn.com/" + str(
                    self.league) + "/team/schedule/_/name/" + str(
                        team[0]) + "/year/" + str(years[x]) + "/" + str(
                            team[1])
                data = self.scrape_game_scores(url)
            elif self.league == "nba":
                #seasontype of 2 refers to regular season, while 1 and 3 refer to pre and post season respectively
                url = "http://espn.go.com/" + str(
                    self.league) + "/team/schedule/_/name/" + str(
                        team[0]) + "/year/" + str(
                            years[x]) + "/seasontype/2/" + str(team[1])
                data = self.scrape_game_scores(url)
            elif self.league == "mlb":
                #seasontype of 2 refers to regular season. MLB splits season into 2 halves
                url = "http://espn.go.com/" + str(
                    self.league) + "/team/schedule/_/name/" + str(
                        team[0]) + "/year/" + str(
                            years[x]) + "/seasontype/2/half/1/" + str(team[1])
                url2 = "http://espn.go.com/" + str(
                    self.league) + "/team/schedule/_/name/" + str(
                        team[0]) + "/year/" + str(
                            years[x]) + "/seasontype/2/half/2/" + str(team[1])

                data = self.scrape_game_scores(url)
                data2 = self.scrape_game_scores(url2)

                for y in range(0, len(data2['dates'])):
                    data['dates'].append(data2['dates'][y])
                    data['home_away'].append(data2['home_away'][y])
                    data['game_urls'].append(data2['game_urls'][y])
                    data['game_scores'].append(data2['game_scores'][y])

            path = "./" + str(self.league) + "/team_data/" + str(
                years[x]) + "/" + team[1] + ".csv"

            self.to_print("Loading existing data")
            #gets proper season/year of data
            existing_data = self.universal.load_data(team, "", years[x])
            for y in range(0, len(existing_data)):
                if str(existing_data[y]['year']) == str(years[x]):
                    existing_data = existing_data[y]
                    break

            data['other_team'] = []
            data['period_scores'] = []

            for y in range(0, len(data['dates'])):

                #check if game data already scraped
                exists = False
                for z in range(0, len(existing_data['dates'])):
                    if existing_data['dates'][z] == data['dates'][y]:
                        exists = True
                        break

                #if game data hasn't been scraped, scrape it and add it
                if exists == False:
                    game_url = data['game_urls'][y]

                    self.to_print("Returned " + str(data['dates'][y]) +
                                  "'s game results")

                    #goes to playbyplay page since it has the same info and loads more quickly
                    game_period_url = game_url.replace("recap", "playbyplay")

                    #scrapes for period score data
                    self.to_print("Scraping " + str(data['dates'][y]) +
                                  "'s period data: " + str(game_period_url))
                    # time.sleep(1)
                    period_data = self.scrape_period_data(
                        team, url, game_period_url)
                    other_team = period_data['other_team']
                    period_scores = period_data['scores']
                    if period_data['other_team'] == -1:
                        self.to_print("Scraping " + str(data['dates'][y]) +
                                      "'s period data again")
                        time.sleep(5)
                        period_data = self.scrape_period_data(
                            team, url, game_period_url)
                        other_team = period_data['other_team']
                        period_scores = period_data['scores']
                    # other_team="lad"
                    # period_scores=[[0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0]]

                    #goes to boxscore page since it has the player's data for the game
                    game_players_url = game_url.replace("recap", "boxscore")

                    # #scrapes for players stats
                    # player_stats=self.scrape_player_data(years[x], team, url, game_players_url)
                    # if len(player_stats['away'])==0:
                    # 	time.sleep(5)
                    # 	self.scrape_player_data(years[x], team, url, game_players_url)

                    # input("Pausing...")

                    data['other_team'].append(other_team)
                    # data['player_stats'].append(player_stats)
                    data['game_scores'][y] = data['game_scores'][y].split("-")

                    existing_data['dates'].append(data['dates'][y])
                    existing_data['other_team'].append(other_team)
                    existing_data['home_away'].append(data['home_away'][y])
                    existing_data['game_scores'].append(data['game_scores'][y])
                    existing_data['period_scores'].append(period_scores)

                    self.save_output()

            to_save = []
            for y in range(0, len(existing_data['game_scores'])):

                if existing_data['other_team'][y] != "" and existing_data[
                        'period_scores'][y][0][0] != -1:
                    score = existing_data['game_scores'][y]

                    temp = []
                    temp.append(str(existing_data['dates'][y]))
                    temp.append(existing_data['other_team'][y])
                    temp.append(existing_data['home_away'][y])
                    temp.append(score[0])
                    temp.append(score[1])

                    for period in existing_data['period_scores'][y][0]:
                        temp.append(period)

                    for period in existing_data['period_scores'][y][1]:
                        temp.append(period)

                    to_save.append(temp)

            self.to_print("Saving data to " + str(path))
            self.universal.save_to_csv(path, to_save)
        self.save_output()

    def update_player_data(self, team, year):
        #scrapes all years worth of data
        if year == "":
            years = self.get_seasons(team)
            start = 0
            end = len(years)
        #scrapes certain years worth fo data
        else:
            years = [year]
            start = 0
            end = 1

        for x in range(start, end):
            url = "http://espn.go.com/" + str(
                self.league) + "/team/schedule/_/name/" + str(
                    team[0]) + "/year/" + str(years[x]) + "/" + str(team[1])
            path = "./" + str(self.league) + "/team_data/" + str(
                years[x]) + "/" + team[1] + ".csv"
            print("Scraping: " + str(url))

            data = self.scrape_game_scores(url)

            data['other_team'] = []
            data['period_scores'] = []

            for y in range(0, len(data['dates'])):

                game_url = data['game_urls'][y]

                #goes to boxscore page since it has the player's data for the game
                game_players_url = game_url.replace("recap", "boxscore")

                #scrapes for players stats
                player_stats = self.scrape_player_data(years[x], team, url,
                                                       game_players_url)
                if len(player_stats['away']) == 0:
                    time.sleep(5)
                    self.scrape_player_data(years[x], team, url,
                                            game_players_url)

    #gets teams playing today
    def get_schedule(self):

        url = "http://espn.go.com/" + str(self.league) + "/schedule"
        data = self.scrape_webpage(url)

        ### OLD html ###
        # to_find="<caption>"

        # start=data.find(to_find)+len(to_find)
        # end=data[start:].find(to_find)

        to_start = "<tbody>"
        to_end = "</tbody>"

        start = data.find(to_start) + len(to_start)
        end = data[start:].find(to_end)

        new_data = data[start:start + end]

        abbrs = new_data.split("<abbr")
        abbrs.pop(0)

        teams = self.load_league_teams()

        games = []
        temp = []
        for x in range(0, len(abbrs)):
            start = abbrs[x].index('">') + 2
            name_abbr = abbrs[x][start:abbrs[x].index("</abbr>")].lower()

            full_team_name = []
            for y in range(0, len(teams)):
                if teams[y][0] == name_abbr:
                    full_team_name = teams[y]

            if x % 2 == 0:
                temp.append(full_team_name)
            else:
                temp.append(full_team_name)
                games.append(temp)
                temp = []

        return games

    #gets years for listed seasons on ESPN's website
    def get_seasons(self, team):
        url = "http://espn.go.com/" + str(
            self.league) + "/team/schedule/_/name/" + str(team[0])
        data = self.scrape_webpage(url)

        start = data.index("Year:")
        end = data[start:].index("</form>")

        new_data = data[start:start + end]

        # print(new_data)
        split = new_data.split('<option value="')
        #removes excess form data
        split.pop(0)
        #removes current season's url because we'll get it later
        split.pop(0)

        #retrieves season's year from URL in select HTML element
        to_return = []
        for item in split:
            temp = item.split('"')
            url = temp[0]

            index = url.index("year/")
            year = url[index + 5:index + 5 + 4]
            to_return.append(int(year))

        #Sorts smallest to largest then increments latest year to get current year
        #since ESPN's website doesn't include current season's
        to_return.sort()
        to_return.append(to_return[-1] + 1)

        return to_return

    #gets scores and game urls
    #if new data, retrieve period scores and add all to global lists
    def scrape_game_scores(self, url):
        data = {}
        data['dates'] = []
        data['home_away'] = []
        data['game_urls'] = []
        data['game_scores'] = []
        try:

            self.to_print("Scraping game scores from " + str(url))
            content = self.scrape_webpage(url)

            start = content.index("Regular Season Schedule")

            #espn has preseason stats for NHL teams, and that messes up the html
            if self.league == "nhl" and "preseason schedule" in content[
                    start:].lower():
                end = content.index("Preseason Schedule")
            else:
                end = content.index("<!-- begin sponsored links -->")

            new_data = content[start:end]

            #separates each game
            temp = new_data.split('<li class="team-name">')

            #gets scores and game urls for better scores
            old_date = ""
            for x in range(0, len(temp)):
                try:
                    #if lost game
                    lost = "game-status loss" in temp[x]

                    #determines whether game was at home or away
                    if "@" in temp[x - 1]:
                        home_away = "away"
                    else:
                        home_away = "home"

                    try:
                        #gets game date
                        temp_split = temp[x].split('<tr class="evenrow')
                        if len(temp_split) == 1:
                            temp_split = temp[x].split('<tr class="oddrow')

                        #turns ...][ team-90-24"><td>Wed, Oct 14</td><td><ul... into [ team-90-24"><td>Wed, Oct 14]
                        temp_split = temp_split[1][:temp_split[1].index("</td>"
                                                                        )]
                        #turns [ team-90-24"><td>Wed, Oct 14] into "Wed, Oct 14"
                        string_date = temp_split.split("<td>")[1]
                        #turns "Wed, Oct 14" into "Oct 14"
                        string_date = string_date.split(", ")[1]
                        #turns "Oct 14" into ["Oct", "14"]
                        split_date = string_date.split(" ")
                        months = {
                            "Jan": 1,
                            "Feb": 2,
                            "Mar": 3,
                            "Apr": 4,
                            "May": 5,
                            "Jun": 6,
                            "Jul": 7,
                            "Aug": 8,
                            "Sep": 9,
                            "Oct": 10,
                            "Nov": 11,
                            "Dec": 12
                        }
                        #gets year from url
                        split_url = url.split("/year/")
                        year = int(split_url[1][:split_url[1].index("/")])
                        date = str(months[split_date[0]]
                                   ) + "-" + split_date[1] + "-" + str(year)
                        if x == 0:
                            old_date = date
                    except Exception as error:
                        exc_type, exc_obj, exc_tb = sys.exc_info()
                        fname = os.path.split(
                            exc_tb.tb_frame.f_code.co_filename)[1]
                        to_print = exc_type, fname, exc_tb.tb_lineno
                        self.to_print("scrape_game_scores(), scraping date: " +
                                      str(to_print))

                    # self.to_print("Date: "+str(date))
                    # self.to_print("Old date: "+str(date))

                    string_to_find = '<li class="score"><a href="'
                    # self.to_print("String to find: "+str(string_to_find))

                    # print()
                    game_link = temp[x][temp[x].index(string_to_find) +
                                        len(string_to_find):]

                    score = game_link[game_link.index('">') +
                                      2:game_link.index("</a>")]

                    # self.to_print("Score: "+str(score))

                    #if lost game, switch score order since site always lists highest score first
                    if lost:
                        temp2 = score.split("-")
                        score = temp2[1] + "-" + temp2[0]

                    #removes extra innings string "F/12" from scores
                    temp2 = score.split("-")
                    temp2[0] = temp2[0].split(" ")
                    temp2[0] = temp2[0][0]
                    temp2[1] = temp2[1].split(" ")
                    temp2[1] = temp2[1][0]
                    score = temp2[0] + "-" + temp2[1]

                    game_link = game_link[:game_link.index('"')]
                    # game_link="http://espn.go.com"+game_link
                    game_link = "http:" + game_link

                    data['dates'].append(old_date)
                    data['game_urls'].append(game_link)
                    data['game_scores'].append(score)
                    data['home_away'].append(home_away)
                    old_date = date

                except Exception as error:
                    exc_type, exc_obj, exc_tb = sys.exc_info()
                    fname = os.path.split(
                        exc_tb.tb_frame.f_code.co_filename)[1]
                    to_print = exc_type, fname, exc_tb.tb_lineno
                    self.to_print("scrape_game_scores(): " + str(to_print))
            return data
        except Exception as error:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            to_print = exc_type, fname, exc_tb.tb_lineno
            self.to_print("scrape_game_scores(): " + str(to_print))

            return data

    #gets a game's scores for each period
    def scrape_period_data(self, team, team_url, game_url):

        #espn uses old HTML code for nhl games
        if self.league == "nhl":
            print()
            data = self.scrape_webpage(game_url)

            print("Game url: " + str(game_url))
            # if data=="" or game_url=="http://espn.go.com":
            if data == "" or game_url == "http:":
                return {
                    'other_team': -1,
                    'scores': [[-1, -1, -1], [-1, -1, -1]]
                }

            #gets teams playing in this game
            start = data.index("gameInfo:")
            end = data[start:].index(",") + start

            #now should have gameInfo:"nhl:game:gameid=400884409-ana+at+dal",
            # print(data[start:end])

            split = data[start:end].split("-")
            temp = split[1]
            #temp should now be ana+at+dal",
            split = temp.split("+")
            team1 = split[0].replace('"', "")
            team2 = split[2].replace('"', "")
            #team1 and team2 are the 3 letter abbreviations of teams EX: ana for anaheim-ducks
            print("Team1: " + str(team1) + " | Team2: " + str(team2))
            print("Cur team: " + str(team[0]) + " | " + str(team[1]))
            if team1 == team[0]:
                other_team = team2
            else:
                other_team = team1

            # input()

            # print(data)

            start = data.index(
                '<table cellspacing="0" id="gp-linescore" class="linescore"  >'
            )
            end = start + data[start:].index("</table>")

            new_data = data[start:end].replace("\n", "").replace("\t", "")

            #separates each game
            rows = new_data.split('<a href="')

            if len(rows) == 2:
                #if first team listed is one with an old name
                if '<td class="team">' not in rows[1]:
                    temp = rows[0].split('<td class="team">')
                    temp.pop(0)
                    temp.pop(0)
                    temp.pop()

                    rows[0] = temp[-1]
                #if second team listed is one with old name
                else:
                    rows.pop(0)
                    rows = rows[0].split('<td class="team">')
            else:
                rows.pop(0)

            for x in range(0, len(rows)):
                print(str(x) + " | " + str(rows[x]))

            scores = []
            for row in rows:
                #separates each quarter
                quarters = row.split('text-align:center" >')

                temp = []
                for quarter in quarters:
                    score = quarter[:quarter.index("</td>")].strip()
                    temp.append(score)
                scores.append(temp)

            #if team is listed 2nd, make it listed 1st for consistency
            #can't do last 2 characters because they could be in url even though not correct team. 5 guarenttes a / in test url for best comparison
            print("URL: " + str(team_url))
            print("Scores: " + str(scores))
            if len(scores) != 0:
                if team_url[-5:] in scores[1][0]:
                    temp = scores[1]
                    scores[1] = scores[0]
                    scores[0] = temp

                scores[0].pop(0)
                scores[1].pop(0)

                #some games don't include a 3rd quarter
                while len(scores[0]) < 3:
                    scores[0].append(0)
                while len(scores[1]) < 3:
                    scores[1].append(0)

                to_return = {}
                to_return['other_team'] = other_team
                to_return['scores'] = scores
                # return scores
                return to_return
            else:
                return {
                    'other_team': "",
                    'scores': [[-1, -1, -1], [-1, -1, -1]]
                }

        elif self.league == "nba":
            data = self.scrape_webpage(game_url)

            print("Game url: " + str(game_url))
            if data == "":
                return {
                    'other_team': -1,
                    'scores': [[0, 0, 0, 0], [0, 0, 0, 0]]
                }

            try:
                start = data.index('<div id="custom-nav"')
                end = data[start:].index(
                    '<div id="gamepackage-links-wrap">') + start
            except Exception as error:
                print("scrape_period_data.py" + str(error))
                return {
                    'other_team': -1,
                    'scores': [[0, 0, 0, 0], [0, 0, 0, 0]]
                }

            # split_data[start:end].split('class="abbrev"')

            split = data[start:end].split('<span class="abbrev"')
            split.pop(0)

            #returns [lal, lal, okc, okc]
            for x in range(0, len(split)):
                split[x] = split[x][split[x].index(">") +
                                    1:split[x].index("<")]

            # split.pop(0)
            # split.pop(1)

            team1 = split[0].lower()
            team2 = split[1].lower()
            print("Team1: " + str(team1) + " | Team2: " + str(team2))
            print("Cur team: " + str(team[0]) + " | " + str(team[1]))

            # start=data.index('Final</span>')
            start = data.index('<div id="custom-nav"')
            end = start + data[start:].index("</table>")

            new_data = data[start:end].replace("\n", "").replace("\t", "")

            #separates each game
            rows = new_data.split('final-score">')

            rows.pop(0)
            rows.pop()
            for x in range(0, len(rows)):
                rows[x] = rows[x].split('team-name">')[-1]

            # temp=rows[0].replace("<", "").replace(">","").split("td")
            for x in range(0, len(rows)):
                rows[x] = rows[x].split("<td")
                rows[x].pop(0)
                rows[x].pop()
                for y in range(0, len(rows[x])):
                    rows[x][y] = rows[x][y].replace("</td>", "")
                    rows[x][y] = rows[x][y].replace(">", "")

            scores = rows
            if team1 == team[0]:
                other_team = team2
            else:
                temp = scores[1]
                scores[1] = scores[0]
                scores[0] = temp
                other_team = team1

            #some games don't include a 4th period
            while len(scores[0]) < 4:
                scores[0].append(0)
            while len(scores[1]) < 4:
                scores[1].append(0)

            to_return = {}
            to_return['other_team'] = other_team
            to_return['scores'] = scores
            # return scores
            return to_return

        # Baseball MLB
        elif self.league == "mlb":
            print()
            data = self.scrape_webpage(game_url)

            self.to_print("Game url: " + str(game_url))
            if data == "" or game_url == "http://espn.go.com" or game_url == "http://espn.go.com#":
                return {
                    'other_team':
                    -1,
                    'scores': [[-1, -1, -1, -1, -1, -1, -1, -1, -1],
                               [-1, -1, -1, -1, -1, -1, -1, -1, -1]]
                }

            #gets teams playing in this game
            start = data.index("var omniPageName =")
            end = data[start:].index(";") + start

            #gets other team playing
            split = data[start:end].split("+")
            split.pop(0)
            split.pop(0)
            team1 = split[0].replace('"', "")
            team2 = split[2].replace('"', "")
            self.to_print("Team1: " + str(team1) + " | Team2: " + str(team2))
            self.to_print("Cur team: " + str(team[0]) + " | " + str(team[1]))
            if team1 == team[0]:
                other_team = team2
            else:
                other_team = team1
            # else:
            # return {'other_team': "", 'scores': [[-1,-1,-1-1,-1,-1,-1,-1,-1], [-1,-1,-1,-1,-1,-1,-1,-1,-1]]}

            self.to_print("Other team: " + str(other_team))

            #isolates period data html
            start = data.index('class="linescore"')
            end = start + data[start:].index("</table>")
            new_data = data[start:end].replace("\n", "").replace("\t", "")

            #separates each team
            rows = new_data.split('<a href="')
            if len(rows) == 2:
                #if first team listed is one with an old name
                if '<td class="team" style="width: 3em !important">' not in rows[
                        1]:
                    temp = rows[0].split(
                        '<td class="team" style="width: 3em !important">')
                    temp.pop(0)
                    temp.pop(0)
                    temp.pop()

                    rows[0] = temp[-1]
                #if second team listed is one with old name
                else:
                    rows.pop(0)
                    rows = rows[0].split(
                        '<td class="team" style="width: 3em !important">')
            #removes column headers
            else:
                rows.pop(0)

            print()
            for x in range(0, len(rows)):
                print(str(x) + " | " + str(rows[x]))

            # input()

            scores = []
            for row in rows:
                #separates each quarter
                quarters = row.split('text-align:center">')

                temp = []
                for quarter in quarters:
                    score = quarter[:quarter.index("</td>")].strip()
                    temp.append(score)
                scores.append(temp)

            #if team is listed 2nd, make it listed 1st for consistency
            #can't do last 2 characters because they could be in url even though not correct team. 5 guarenttes a / in test url for best comparison
            self.to_print("URL: " + str(team_url))
            self.to_print("Scores: " + str(scores))
            # input()
            if len(scores) != 0 and len(scores[0]) > 1:
                #sorts scores to match teams
                if team_url[-5:] in scores[1][0]:
                    temp = scores[1]
                    scores[1] = scores[0]
                    scores[0] = temp

                #remove urls from scores list
                scores[0].pop(0)
                scores[1].pop(0)

                #some games don't include a 3rd quarter
                while len(scores[0]) < 9:
                    scores[0].append(0)
                while len(scores[1]) < 9:
                    scores[1].append(0)

                #9th inning is "-" if team didn't have to go to bottom of the 9th
                if scores[0][-1] == "-":
                    scores[0][-1] = 0
                if scores[1][-1] == "-":
                    scores[1][-1] = 0

                to_return = {}
                to_return['other_team'] = other_team
                to_return['scores'] = scores
                return to_return
            else:
                return {
                    'other_team':
                    "",
                    'scores': [[-1, -1, -1, -1, -1, -1, -1, -1, -1],
                               [-1, -1, -1, -1, -1, -1, -1, -1, -1]]
                }

    def scrape_player_data(self, season, team, team_url, game_url):

        #espn uses old HTML code for nhl games
        # if self.league=="nhl":

        game_id = int(
            game_url.replace("http://espn.go.com/nba/boxscore?gameId=",
                             "").replace("http://espn.go.com/nba/boxscore?id=",
                                         ""))

        data = self.scrape_webpage(game_url)

        print("Game url: " + str(game_url))
        if data == "" or game_url == "http://espn.go.com":
            return {'away': [], 'home': []}

        #gets first team listed
        start = data.index('<span class="team-name-short">')
        end = data[start:].index("</span>") + start
        first_team = data[start:end].lower()

        #gets second team listed
        start = data[end:].index('<span class="team-name-short">') + end
        end = data[start:].index("</span>") + start
        second_team = data[start:end].lower()

        #gets players playing in the game
        start = data.index('<article class="boxscore-tabs')
        end = start + data[start:].index("</article>")

        new_data = data[start:end]

        #gets html for away team
        away_team = new_data[new_data.find('gamepackage-away-wrap">'):new_data.
                             find('gamepackage-home-wrap">')]
        #gets html for home team
        home_team = new_data[new_data.find('gamepackage-home-wrap">'):]

        away_player_stats = self.scrape_player_data2(away_team)
        home_player_stats = self.scrape_player_data2(home_team)

        #consolidates player stats
        player_ids = away_player_stats['player_ids']
        for player_id in home_player_stats['player_ids']:
            player_ids.append(player_id)
            print(player_id)
        player_stats = []
        for stats in away_player_stats['player_stats']:
            player_stats.append(stats)
            player_stats[-1]['home_away'] = "away"
        for stats in home_player_stats['player_stats']:
            player_stats.append(stats)
            player_stats[-1]['home_away'] = "home"

        #add game_id
        #add season
        #add team
        #add home_away

        for x in range(0, len(player_ids)):

            #saves player data to sqlite file
            db = sqlite3.connect("./" + str(self.league) + "/player_data/" +
                                 str(player_ids[x]) + ".sqlite")
            cursor = db.cursor()

            try:
                cursor.execute('''CREATE TABLE "Games" (
					game_id TEXT PRIMARY KEY,
					season INTEGER, 
					home_away TEXT,
					pts INTEGER, 
					min INTEGER, 
					fg TEXT, 
					pt3 TEXT,
					ft TEXT,
					rb INTEGER,
					oreb INTEGER,
					dreb INTEGER,
					ast INTEGER,
					stl INTEGER,
					blk INTEGER,
					turn INTEGER,
					pf INTEGER
					)''')
                # print("Created table")
            except Exception as exception:
                # print("First exception")
                # print(exception)
                pass

            try:
                cursor.execute(
                    '''INSERT INTO "Games" (
				game_id, 
				season,
				home_away,
				pts, 
				min, 
				fg, 
				pt3,
				ft,
				rb,
				oreb,
				dreb,
				ast,
				stl,
				blk,
				turn,
				pf
				) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)''', (
                        game_id,
                        season,
                        player_stats[x]['home_away'],
                        player_stats[x]['points'],
                        player_stats[x]['minutes_played'],
                        player_stats[x]['field_goals'],
                        player_stats[x]['three_pointers'],
                        player_stats[x]['free_throws'],
                        player_stats[x]['rebounds'],
                        player_stats[x]['offensive_rebounds'],
                        player_stats[x]['defensive_rebounds'],
                        player_stats[x]['assists'],
                        player_stats[x]['steals'],
                        player_stats[x]['blocks'],
                        player_stats[x]['turnovers'],
                        player_stats[x]['personal_fouls'],
                    ))
                # print("Added data")
            except Exception as exception:
                pass
                # print("2nd exception")
                # print(exception)

            db.commit()
            db.close()

        to_return = {}
        to_return['away'] = away_player_stats
        to_return['home'] = home_player_stats
        return to_return

    def scrape_player_data2(self, data):
        table = data.split('<tbody>')
        table.pop(0)

        starters = table[0]
        bench = table[1]

        starter_rows = starters.split("<tr>")
        starter_rows.pop(0)
        starter_rows.pop()

        bench_rows = bench.split("<tr")
        bench_rows.pop(0)
        bench_rows.pop()
        bench_rows.pop()

        player_ids = []
        player_stats = []

        starter_returned = self.scrape_player_data3(starter_rows)
        bench_returned = self.scrape_player_data3(bench_rows)

        for ids in starter_returned['player_ids']:
            player_ids.append(ids)

        for ids in bench_returned['player_ids']:
            player_ids.append(ids)

        for stats in starter_returned['player_stats']:
            player_stats.append(stats)

        for stats in bench_returned['player_stats']:
            player_stats.append(stats)

        # print()
        # for ids in player_ids:
        # 	print(ids)
        # for stat in player_stats:
        # 	print(stat)
        # print()

        to_return = {}
        to_return['player_ids'] = player_ids
        to_return['player_stats'] = player_stats
        return to_return

    #scrapes player data from rows of players
    def scrape_player_data3(self, rows):

        player_ids = []
        player_stats = []
        for x in range(0, len(rows)):
            start = rows[x].find('href="') + len('href="')
            url = rows[x][start:rows[x][start:].index('"') + start]
            player_id = int(
                url.replace("http://espn.go.com/nba/player/_/id/", ""))
            player_ids.append(player_id)

            #fails if player didn't play
            try:
                player_data = rows[x].split("<td")
                player_data.pop(0)
                player_data.pop(0)

                stats = {}
                stats['minutes_played'] = int(
                    player_data[0][player_data[0].index(">") +
                                   1:player_data[0].index("<")])
                stats['field_goals'] = player_data[1][
                    player_data[1].index(">") + 1:player_data[1].index("<")]
                stats['three_pointers'] = player_data[2][
                    player_data[2].index(">") + 1:player_data[2].index("<")]
                stats['free_throws'] = player_data[3][
                    player_data[3].index(">") + 1:player_data[3].index("<")]
                stats['offensive_rebounds'] = int(
                    player_data[4][player_data[4].index(">") +
                                   1:player_data[4].index("<")])
                stats['defensive_rebounds'] = int(
                    player_data[5][player_data[5].index(">") +
                                   1:player_data[5].index("<")])
                stats['rebounds'] = int(
                    player_data[6][player_data[6].index(">") +
                                   1:player_data[6].index("<")])
                stats['assists'] = int(
                    player_data[7][player_data[7].index(">") +
                                   1:player_data[7].index("<")])
                stats['steals'] = int(
                    player_data[8][player_data[8].index(">") +
                                   1:player_data[8].index("<")])
                stats['blocks'] = int(
                    player_data[9][player_data[9].index(">") +
                                   1:player_data[9].index("<")])
                stats['turnovers'] = int(
                    player_data[10][player_data[10].index(">") +
                                    1:player_data[10].index("<")])
                stats['personal_fouls'] = int(
                    player_data[11][player_data[11].index(">") +
                                    1:player_data[11].index("<")])
                stats['points'] = int(
                    player_data[13][player_data[13].index(">") +
                                    1:player_data[13].index("<")])
                player_stats.append(stats)
            except Exception as error:
                player_stats.append({'minutes_played': '0'})

            # print("Url: "+str(url)+" | Stats: "+str(stats))
        to_return = {}
        to_return['player_ids'] = player_ids
        to_return['player_stats'] = player_stats
        return to_return

    def scrape_webpage(self, url):
        try:
            #initializes url variables
            self.opener.addheaders = [('User-agent',
                                       random.choice(self.user_agents))]

            response = self.opener.open(url, timeout=30)
            http_code = response.code
            info = response.info()

            data = response.read()
            data = data.decode('UTF-8', errors='ignore')

            #decode HTML
            h = html.parser.HTMLParser()
            data = h.unescape(data)

            return data
        except Exception as exception:
            print(exception)
            return ""

    #loads list of league teams
    def load_league_teams(self):
        file_open = open('./' + str(self.league) + '/' + str(self.league) +
                         '_teams.txt')

        teams = []
        for line in file_open:
            temp = line.split("|")

            for x in range(0, len(temp)):
                temp[x] = temp[x].strip()

            teams.append(temp)

        return teams

    def to_print(self, to_print):
        time = self.universal.get_current_time()

        to_print = "[" + str(time) + "] " + str(to_print)

        print(to_print)
        self.output.append(str(to_print))

    def save_output(self):
        self.universal.save_to_txt(self.output_path, self.output)

    def initialize_user_agents(self):
        self.user_agents.append(
            "Mozilla/5.0 (X10; Ubuntu; Linux x86_64; rv:25.0)")
        self.user_agents.append("Mozilla/5.0 (Windows NT 6.0; WOW64; rv:12.0)")
        self.user_agents.append(
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537"
        )
        self.user_agents.append(
            "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/540 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/540"
        )
        self.user_agents.append(
            "Mozilla/5.0 (Windows; U; Windows NT 5.2; it; rv:1.8.1.11) Gecko/20071327 Firefox/2.0.0.10"
        )
        self.user_agents.append("Opera/9.3 (Windows NT 5.1; U; en)")

        #initializes url variables
        self.opener = urllib.request.build_opener(
            urllib.request.HTTPRedirectHandler(),
            urllib.request.HTTPHandler(debuglevel=0))
        self.opener.addheaders = [('User-agent',
                                   random.choice(self.user_agents))]
Esempio n. 3
0
class Odds_Calculator:

    opener = None
    scraper = None
    algo = None
    universal = None

    user_agents = []

    #can be nba, nhl, nfl, mlb
    league = "nba"

    num_periods = {'nba': 4, 'nhl': 3, 'nfl': 4, 'mlb': 9}

    def __init__(self, league):
        self.league = league.lower()
        self.universal = Universal_Functions(self.league)
        self.espn_scraper = ESPN_Scraper(self.league)

    #analyzes a single team
    def single_team_analysis(self, team):
        cur_year = input("Current season year: ")
        self.espn_scraper.update_data(team, cur_year)
        data = self.universal.load_data(team, "", cur_year)
        self.analyze(team, data, cur_year)

    #analyzes 2 teams and compares to determine which has best chance of winning
    def team_comparison(self, algo_version, team1, team2, date, cur_year):

        self.algo = Algo(self.league)

        self.espn_scraper.update_data(team1, cur_year)
        self.espn_scraper.update_data(team2, cur_year)

        data1 = self.universal.load_data(team1, date, cur_year)
        data2 = self.universal.load_data(team2, date, cur_year)

        returned1 = self.analyze2(team1, team2, data1, "away")
        returned2 = self.analyze2(team2, team1, data2, "home")

        # print(str(team1)+" | "+str(team2))
        # print(returned1)
        # print(returned2)
        # print()

        if algo_version == "Algo_V1":
            algo_data = self.algo.calculate(date, returned1, returned2)
        elif algo_version == "Algo_V2":
            algo_data = self.algo.calculate_V2(date, returned1, returned2)

        record_points = algo_data['record_points']
        home_away_points = algo_data['home_away_points']
        home_away_10_games_points = algo_data['home_away_10_games_points']
        last_10_games_points = algo_data['last_10_games_points']
        avg_points = algo_data['avg_points']
        avg_points_10_games = algo_data['avg_points_10_games']
        # win_streak_10_games=       algo_data['win_streak_10_games']
        if self.league == "nhl":
            win_streak_home_away = algo_data['win_streak_home_away']
        total = algo_data['total']

        to_output = []
        to_output.append("")
        to_output.append("Date: " + str(date))
        to_output.append("Away: " + str(team1[1]) + " | Home: " +
                         str(team2[1]))

        if algo_version == "Algo_V1":
            win_streak = algo_data['win_streak']
            win_streak_home_away = algo_data['win_streak_home_away']
            if self.league == "nba":
                to_output.append("Seasonal Record:      " +
                                 str(record_points * 10) + "/10 = " +
                                 str(record_points))
                to_output.append("Home Away:            " +
                                 str(home_away_points * 10) + "/10 = " +
                                 str(home_away_points))
                to_output.append("Home away 10:         " +
                                 str(home_away_10_games_points * 5) + "/5 = " +
                                 str(home_away_10_games_points))
                to_output.append("Last 10 games:        " +
                                 str(last_10_games_points * 5) + "/5 = " +
                                 str(last_10_games_points))
                to_output.append("Avg points:           " +
                                 str(avg_points * 8) + "/8 = " +
                                 str(avg_points))
                to_output.append("Avg points 10:        " +
                                 str(avg_points_10_games * 8) + "/8 = " +
                                 str(avg_points_10_games))
                to_output.append("Win streak:           " +
                                 str(win_streak * 3) + "/3 = " +
                                 str(win_streak))
                to_output.append("Win streak home away: " +
                                 str(win_streak_home_away * 3) + "/3 = " +
                                 str(win_streak_home_away))
            else:
                to_output.append("Seasonal Record:      " +
                                 str(record_points * 5) + "/5 = " +
                                 str(record_points))
                to_output.append("Home Away:            " +
                                 str(home_away_points * 5) + "/5 = " +
                                 str(home_away_points))
                to_output.append("Home away 10:         " +
                                 str(home_away_10_games_points * 5) + "/5 = " +
                                 str(home_away_10_games_points))
                to_output.append("Last 10 games:        " +
                                 str(last_10_games_points * 5) + "/5 = " +
                                 str(last_10_games_points))
                to_output.append("Avg points:           " +
                                 str(avg_points / 2) + "*2 = " +
                                 str(avg_points))
                to_output.append("Avg points 10:        " +
                                 str(avg_points_10_games / 2) + "*2 = " +
                                 str(avg_points_10_games))
                to_output.append("Win streak:           " +
                                 str(win_streak * 3) + "/3 = " +
                                 str(win_streak))
                to_output.append("Win streak home away: " +
                                 str(win_streak_home_away * 3) + "/3 = " +
                                 str(win_streak_home_away))
            to_output.append("--------")
            to_output.append("Total: " + str(total))
            to_output.append("--------")

        elif algo_version == "Algo_V2":
            to_output.append("Seasonal Record:      " + str(record_points) +
                             "%")
            to_output.append("Home Away:            " + str(home_away_points) +
                             "%")
            to_output.append("Home away 10:         " +
                             str(home_away_10_games_points) + "%")
            to_output.append("Last 10 games:        " +
                             str(last_10_games_points) + "%")
            to_output.append("Avg points:           " + str(avg_points) + "%")
            to_output.append("Avg points 10:        " +
                             str(avg_points_10_games) + "%")
            # to_output.append("Win streak:           "+str(win_streak)+"%")
            if self.league == "nhl":
                to_output.append("Win streak home away: " +
                                 str(win_streak_home_away) + "%")
            to_output.append("--------")
            to_output.append("Total: " + str(total) + "%")
            to_output.append("--------")

        #chance of favorable team winning
        if algo_version == "Algo_V1":
            winning_odds = self.get_odds(total)
        elif algo_version == "Algo_V2":
            winning_odds = abs(total)

        to_output.append("Perc chance to win: " + str(winning_odds) + "%")

        favorable_odds = (100 / (100 - winning_odds) - 1) * 100
        underdog_odds = (100 / (100 - winning_odds) - 1) * 100
        to_output.append("Favorable team odds: -" + str(favorable_odds))
        to_output.append("Underdog team odds: +" + str(underdog_odds))

        return to_output

    #gets odds of winning for algo_V1
    def get_odds(self, total_points):

        #puts total points at a max of 27
        max_points = 27
        if abs(total_points) > max_points:
            total_points = max_points

        x = abs(total_points) / max_points * 10

        #2D polynomial that follows the percentage chance of winning per level of ranking 1-10
        if self.league == "nba":
            y = -0.23 * (x**2) + 7.25 * x + 47.9
        else:
            y = -0.23 * (x**2) + 7.25 * x + 47.9

        if y < 50:
            y = 50

        return y

    #analyzes current team
    def analyze(self, team, data, end_year):

        if os.path.isdir("./" + str(self.league) +
                         "/analyze/single_analysis/" + str(team[1])) == False:
            os.mkdir("./" + str(self.league) + "/analyze/single_analysis/" +
                     str(team[1]))

        home_away = input("Are they home or away: ").lower()
        other_team = input("Playing against (letter abbreviation): ")

        returned = self.analyze2(team, other_team, data, home_away)
        self.save_analysis(team, data, returned, home_away)

        returned['output'] = self.get_output_analysis("", team, returned,
                                                      home_away)

        more_output = self.analyze_wins_ranked_teams(team, data, end_year)
        # more_output=[]

        for line in more_output:
            returned['output'].append(line)

        self.universal.save_to_txt(
            "./" + str(self.league) + "/analyze/single_analysis/" +
            str(team[1]) + "/" + str(team[1]) + "_analysis.txt",
            returned['output'])

    #analyzes whatever team needed for self.analyze()
    def analyze2(self, team, other_team, data, home_away):

        print("Analyzing " + str(team))

        to_return = {}

        season_record = self.get_seasonal_records(data)

        # print("Season record: "+str(season_record))

        # input("waiting...")

        #seasonal win-loss ratio
        to_return['seasonal_records'] = self.get_seasonal_records(data)
        #average point stats
        to_return['avg_game_points'] = self.get_avg_points(data)
        #stats in home vs away games
        to_return['home_away_record'] = self.get_home_away_record(data)
        #seasonal win-loss ratio
        to_return['current_win_ratio'] = self.get_current_win_ratio(data)
        #last 10 games win ratio
        to_return['10_game_win_ratio'] = self.analyze_10_games_win_ratio(data)
        #winning or losing streaks against specified team
        #definition only accepts "lal" and not ["lal", "los-angeles-lakers"], so check
        if isinstance(other_team, list):
            to_return[
                'win_loss_streaks_against'] = self.get_win_streaks_against(
                    other_team[0], data)
        else:
            to_return[
                'win_loss_streaks_against'] = self.get_win_streaks_against(
                    other_team, data)

        return to_return

    def save_analysis(self, team, data, returned, home_away):

        #seasonal win-loss ratio
        records = returned['seasonal_records']
        to_save = []
        for x in range(0, len(records)):
            to_save.append(
                ["1-1-" + str(data[x]['year']), records[x][0] - records[x][1]])
        path = "./" + str(self.league) + "/analyze/single_analysis/" + str(
            team[1]) + "/" + str(team[1]) + "_seasonal_records.csv"
        self.universal.save_to_csv(path, to_save)
        print("Saved to " + str(path))

        #average point stats
        avg_points = returned['avg_game_points']
        to_save = []
        for x in range(0, len(avg_points['avg_game_points'])):
            to_add = []
            to_add.append("1-1-" + str(data[x]['year']))
            to_add.append(avg_points['avg_game_points'][x])
            to_add.append(avg_points['avg_other_game_points'][x])
            to_add.append(avg_points['avg_game_points'][x] +
                          avg_points['avg_other_game_points'][x])
            for y in range(0, len(avg_points['avg_quarter_points'][x])):
                to_add.append(avg_points['avg_quarter_points'][x][y])
            to_save.append(to_add)
        path = "./" + str(self.league) + "/analyze/single_analysis/" + str(
            team[1]) + "/" + str(team[1]) + "_avg_game_points.csv"
        self.universal.save_to_csv(path, to_save)
        print("Saved to " + str(path))

        #stats in home vs away games
        home_away_records = returned['home_away_record']
        to_save = []
        for x in range(0, len(home_away_records['home_record'])):
            to_add = []
            to_add.append("1-1-" + str(data[x]['year']))
            to_add.append(home_away_records['home_record'][x][0])
            to_add.append(home_away_records['home_record'][x][1])
            to_save.append(to_add)
        to_save.append(["", "", ""])
        to_save.append(["", "", ""])
        to_save.append(["", "", ""])
        for x in range(0, len(home_away_records['away_record'])):
            to_add = []
            to_add.append("1-1-" + str(data[x]['year']))
            to_add.append(home_away_records['away_record'][x][0])
            to_add.append(home_away_records['away_record'][x][1])
            to_save.append(to_add)
        path = "./" + str(self.league) + "/analyze/single_analysis/" + str(
            team[1]) + "/" + str(team[1]) + "_home_away_record.csv"
        self.universal.save_to_csv(path, to_save)
        print("Saved to " + str(path))

        #seasonal win-loss ratio
        win_loss = returned['current_win_ratio']
        path = "./" + str(self.league) + "/analyze/single_analysis/" + str(
            team[1]) + "/" + str(team[1]) + "_current_win_ratio.csv"
        self.universal.save_to_csv(path, win_loss)
        print(path)

        #last 10 games win ratio
        last_10_games = returned['10_game_win_ratio']
        to_save = []
        to_save.append(["Year", "win-loss", "num wins", "num games"])
        for x in range(0, len(last_10_games)):
            for y in range(-10, 11, 2):
                to_add = []
                #only has year at beginning of listing
                if y == -10:
                    to_add.append(data[x]['year'])
                else:
                    to_add.append("")

                # to_add.append(str(y))
                temp = {
                    '-10': '"0-10"',
                    '-8': '"1-9"',
                    '-6': '"2-8"',
                    '-4': '"3-7"',
                    '-2': '"4-6"',
                    '0': '"5-5"',
                    '2': '"6-4"',
                    '4': '"7-3"',
                    '6': '"8-2"',
                    '8': '"9-1"',
                    '10': '"10-0"'
                }
                #turns -4 into "3-7"
                to_add.append(temp[str(y)])
                to_add.append(last_10_games[x][str(y)][0])
                to_add.append(last_10_games[x][str(y)][1])
                #gets win percentage
                if last_10_games[x][str(y)][1] != 0:
                    to_add.append("=C" + str(len(to_save) + 1) + "/D" +
                                  str(len(to_save) + 1) + "*100")
                else:
                    to_add.append(0)

                to_save.append(to_add)
            to_save.append(["", "", "", ""])
        path = "./" + str(self.league) + "/analyze/single_analysis/" + str(
            team[1]) + "/" + str(team[1]) + "_10_game_win_ratio.csv"
        self.universal.save_to_csv(path, to_save)
        print(path)

        #winning or losing streaks against specified team
        to_save = []
        wins_against = returned['win_loss_streaks_against']
        to_save.append(["Losing streak", wins_against['games_since_last_win']])
        to_save.append(
            ["Winning streak", wins_against['games_since_last_loss']])
        if home_away == "away":
            to_save.append([
                "Losing streak away", wins_against['games_since_last_win_away']
            ])
            to_save.append([
                "Winning streak away",
                wins_against['games_since_last_loss_away']
            ])
        elif home_away == "home":
            to_save.append([
                "Losing streak home", wins_against['games_since_last_win_home']
            ])
            to_save.append([
                "Winning streak home",
                wins_against['games_since_last_loss_home']
            ])
        path = "./" + str(self.league) + "/analyze/single_analysis/" + str(
            team[1]) + "/" + str(team[1]) + "_win_loss_streaks_against.csv"
        self.universal.save_to_csv(path, to_save)
        print(path)

    def get_output_analysis(self, indent, team, returned, home_away):

        records = returned['seasonal_records']
        avg_points = returned['avg_game_points']
        home_away_records = returned['home_away_record']
        win_loss = returned['current_win_ratio']
        last_10_games = returned['10_game_win_ratio']
        wins_against = returned['win_loss_streaks_against']

        #### output ####
        to_output = []
        to_output.append("")
        to_output.append("")
        to_output.append(indent + team[1])

        if (records[-1][0] - records[-1][1]) > (records[-2][0] -
                                                records[-2][1]):
            temp = "uptrend"
        else:
            temp = "downtrend"
        to_output.append(indent + "Season: " +
                         str(records[-1][0] - records[-1][1]) + " on " +
                         str(temp))

        if home_away == "away":
            to_output.append(indent + "Home-Away: " +
                             str(home_away_records['away_record'][-1][0]) +
                             "-" +
                             str(home_away_records['away_record'][-1][1]) +
                             " away")
            to_output.append(indent + "   Last 10 away games: " +
                             str(home_away_records['away_10_games'][-1][0]) +
                             "-" +
                             str(home_away_records['away_10_games'][-1][1]))
        elif home_away == "home":
            to_output.append(indent + "Home-Away: " +
                             str(home_away_records['home_record'][-1][0]) +
                             "-" +
                             str(home_away_records['home_record'][-1][1]) +
                             " home")
            to_output.append(indent + "   Last 10 home games: " +
                             str(home_away_records['home_10_games'][-1][0]) +
                             "-" +
                             str(home_away_records['home_10_games'][-1][1]))

        win_10_games = 0
        for x in range(len(win_loss) - 1, len(win_loss) - 11, -1):
            win_10_games += win_loss[x][2]

        temp = {
            '-10': '0-10',
            '-8': '1-9',
            '-6': '2-8',
            '-4': '3-7',
            '-2': '4-6',
            '0': '5-5',
            '2': '6-4',
            '4': '7-3',
            '6': '8-2',
            '8': '9-1',
            '10': '10-0'
        }
        to_output.append(indent + "10 Games: " + temp[str(win_10_games)])
        won = last_10_games[-1][str(win_10_games)][0]
        num_games = last_10_games[-1][str(win_10_games)][1]
        if num_games != 0:
            to_output.append(indent + "   " + str(won) + " won out of " +
                             str(num_games) + " games | " +
                             str(won / num_games * 100) + "%")
        else:
            to_output.append(indent + "   " + str(won) + " won out of " +
                             str(num_games) + " games | N/A%")

        to_output.append(indent + "Avg points: " +
                         str(avg_points['avg_game_points'][-1]) + " - " +
                         str(avg_points['avg_other_game_points'][-1]))
        to_output.append(indent + "   Last 10 games: " +
                         str(avg_points['avg_10_games'][-1]) + " - " +
                         str(avg_points['avg_other_10_games'][-1]))

        #on winning streak
        if wins_against['games_since_last_loss'] > 0:
            to_output.append(indent + "Winning streak against " +
                             str(wins_against['other_team']) + ": " +
                             str(wins_against['games_since_last_loss']))
            to_output.append(indent + "   Winning streak " + home_away + ": " +
                             str(wins_against['games_since_last_loss_' +
                                              str(home_away)]))
        elif wins_against['games_since_last_win'] > 0:
            to_output.append(indent + "Losing streak against " +
                             str(wins_against['other_team']) + ": " +
                             str(wins_against['games_since_last_win']))
            to_output.append(indent + "   Losing streak " + home_away + ": " +
                             str(wins_against['games_since_last_win_' +
                                              str(home_away)]))

        return to_output

    #analyzes number of wins against teams of certain rankings. Like # wins against even teams (23-25 to 27-25) or against good teams (30-15) or bad teams (15-30)... etc
    def analyze_wins_ranked_teams(self, team, data, end_year):

        total_output = []
        for x in range(
                len(data[-1]['other_team']) - 1,
                len(data[-1]['other_team']) - 11, -1):
            other_team = []
            other_team.append(data[-1]['other_team'][x])
            other_team.append("")

            date = data[-1]['dates'][x]
            # print("Date: "+str(date))

            home_away = data[-1]['home_away'][x]
            if home_away == "home":
                other_home_away = "away"
            elif home_away == "away":
                other_home_away = "home"

            temp = []
            temp.append(date)
            temp.append(other_team)
            # temp.append()

            league_teams = self.universal.load_league_teams()

            #gets "los-angeles-lakers" if given "lal"
            for y in range(0, len(league_teams)):
                name = league_teams[y]
                if name[0] == other_team[0]:
                    other_team[1] = name[1]

            indent = "   "

            cur_data = self.universal.load_data(team, date, end_year)
            print(cur_data[-1]['other_team'][-1])
            returned = self.analyze2(team, other_team[0], cur_data,
                                     data[-1]['home_away'][x])
            output = self.get_output_analysis(indent, team, returned,
                                              data[-1]['home_away'][x])

            for line in output:
                total_output.append(line)

            other_data = self.universal.load_data(other_team, date, end_year)
            print(
                str(other_data[-1]['other_team'][-1]) + " | " + str(date) +
                " | " + str(other_data[-1]['dates'][-5]))
            returned = self.analyze2(other_team, team[0], other_data,
                                     other_home_away)
            output = self.get_output_analysis(indent, other_team, returned,
                                              other_home_away)

            print()
            for line in output:
                print(line)
                total_output.append(line)
            total_output.append("")

            #adds winner and scores
            cur_team_score = data[-1]['game_scores'][x][0]
            other_team_score = data[-1]['game_scores'][x][1]
            if cur_team_score > other_team_score:
                total_output.append(indent + "Winner: " + team[1] + " | " +
                                    str(cur_team_score) + "-" +
                                    str(other_team_score))
            else:
                total_output.append(indent + "Winner: " + other_team[1] +
                                    " | " + str(other_team_score) + "-" +
                                    str(cur_team_score))

            total_output.append(indent +
                                "----------------------------------------")
            print()

        return total_output

    #returns wins/loss streaks against other_team
    def get_win_streaks_against(self, other_team, original_data):

        to_return = {}
        to_return['other_team'] = other_team
        to_return['games_since_last_win'] = 0
        to_return['games_since_last_loss'] = 0
        to_return['games_since_last_win_away'] = 0
        to_return['games_since_last_win_home'] = 0
        to_return['games_since_last_loss_away'] = 0
        to_return['games_since_last_loss_home'] = 0
        for x in range(0, len(original_data)):
            data = original_data[x]

            year = data['year']

            for y in range(0, len(data['other_team'])):
                if data['other_team'][y] == other_team:

                    # if x==len(original_data)-1:
                    # 	print(str(year)+" | "+str(other_team)+" | "+str(data['game_scores'][y][0])+"-"+str(data['game_scores'][y][1]))

                    #if won
                    if data['game_scores'][y][0] > data['game_scores'][y][1]:
                        to_return['games_since_last_win'] = 0
                        to_return['games_since_last_loss'] += 1

                        if data['home_away'][y] == "away":
                            to_return['games_since_last_win_away'] = 0
                            to_return['games_since_last_loss_away'] += 1
                        else:
                            to_return['games_since_last_win_home'] = 0
                            to_return['games_since_last_loss_home'] += 1
                    #if lost
                    else:
                        to_return['games_since_last_win'] += 1
                        to_return['games_since_last_loss'] = 0

                        if data['home_away'][y] == "away":
                            to_return['games_since_last_win_away'] += 1
                            to_return['games_since_last_loss_away'] = 0
                        else:
                            to_return['games_since_last_win_home'] += 1
                            to_return['games_since_last_loss_home'] = 0

        return to_return

    # #gets percentage of games won if ahead after 1st quarter, 2nd quarter, etc.
    # def get_perc_win_quarters_ahead(self, data):

    # #gets total goals for and goals against
    # def get_goals_for_against(self, data):

    #determines whether teams win or lose more often if they have a good or bad last 10 games
    def analyze_10_games_win_ratio(self, original_data):

        to_return = []
        for x in range(0, len(original_data)):
            data = original_data[x]

            year = data['year']

            #win_data['4'] will hold data for last 10 games with ratio 7-3
            #increments by 2 since subtracting losses from wins of last 10 games will never have odd number
            win_data = {}
            for y in range(-10, 11, 2):
                win_data[str(y)] = [0, 0]

            last_10_record = []
            for y in range(0, len(data['other_team'])):

                #only gets win ratio if 10 records present

                if len(last_10_record) == 10:
                    temp = sum(last_10_record)

                #adding 1 or -1 is same as subtracting num losses from num wins
                if data['game_scores'][y][0] > data['game_scores'][y][1]:
                    #only counts this win if 10 records already present
                    if len(last_10_record) == 10:
                        win_data[str(sum(last_10_record))][0] += 1
                        win_data[str(sum(last_10_record))][1] += 1

                    last_10_record.append(1)
                else:
                    if len(last_10_record) == 10:
                        win_data[str(sum(last_10_record))][1] += 1

                    last_10_record.append(-1)

                if len(last_10_record) > 10:
                    last_10_record.pop(0)

            to_return.append(win_data)

        return to_return

    #gets win-loss ratio during each game during the current season
    def get_current_win_ratio(self, original_data):

        data = original_data[-1]

        to_return = []
        cur_score = 0
        for x in range(0, len(data['game_scores'])):
            to_add = []
            to_add.append(data['game_scores'][x][0])
            to_add.append(data['game_scores'][x][1])
            # print(data['other_team'][x]+" | "+str(to_add))
            if data['game_scores'][x][0] > data['game_scores'][x][1]:
                temp = 1
            else:
                temp = -1

            to_add.append(temp)
            cur_score += temp
            to_add.append(cur_score)
            to_return.append(to_add)
        return to_return

    #gets wins-losses while at home or away
    def get_home_away_record(self, original_data):

        to_return = {}
        to_return['home_record'] = []
        to_return['away_record'] = []
        to_return['home_10_games'] = []
        to_return['away_10_games'] = []
        for x in range(0, len(original_data)):
            data = original_data[x]

            home_away = data['home_away']
            game_scores = data['game_scores']

            home_record = []
            away_record = []
            for y in range(0, len(home_away)):

                if home_away[y] == "home":
                    if game_scores[y][0] > game_scores[y][1]:
                        home_record.append(1)
                    else:
                        home_record.append(-1)
                elif home_away[y] == "away":
                    if game_scores[y][0] > game_scores[y][1]:
                        away_record.append(1)
                    else:
                        away_record.append(-1)

            to_return['home_record'].append(
                [home_record.count(1),
                 home_record.count(-1)])
            to_return['away_record'].append(
                [away_record.count(1),
                 away_record.count(-1)])

            #gets stats on last 10 games
            home_10_games = [
                home_record[-10:].count(1), home_record[-10:].count(-1)
            ]
            away_10_games = [
                away_record[-10:].count(1), away_record[-10:].count(-1)
            ]

            to_return['home_10_games'].append(home_10_games)
            to_return['away_10_games'].append(away_10_games)

        return to_return

    #calculates a bunch of average points stats
    def get_avg_points(self, original_data):

        to_return = {}

        avg_game_points = []
        avg_other_game_points = []
        avg_10_games = []
        avg_other_10_games = []
        avg_quarters = []
        for x in range(0, len(original_data)):

            data = original_data[x]

            if len(data['other_team']) != 0:
                # print("Year: "+str(original_data[x]['year']))

                #gets avg_game_points
                total_points = 0
                other_total_points = 0
                for y in range(0, len(data['other_team'])):
                    total_points += data['game_scores'][y][0]
                    other_total_points += data['game_scores'][y][1]

                average = total_points / len(data['other_team'])
                average_other = other_total_points / len(data['other_team'])

                avg_game_points.append(self.universal.convert_number(average))
                avg_other_game_points.append(
                    self.universal.convert_number(average_other))

                #gets average points for last 10 games
                total_points = 0
                other_total_points = 0
                for y in range(
                        len(data['other_team']) - 1,
                        len(data['other_team']) - 11, -1):
                    total_points += data['game_scores'][y][0]
                    other_total_points += data['game_scores'][y][1]
                average = total_points / 10
                avg_10_games.append(self.universal.convert_number(average))
                average = other_total_points / 10
                avg_other_10_games.append(
                    self.universal.convert_number(average))

                #gets avg_game_points
                num_periods = self.num_periods[self.league]
                total_quarters = [0] * num_periods * 2
                for y in range(0, len(data['other_team'])):
                    # print(data['period_scores'][y])
                    # print("Num periods: "+str(num_periods))

                    #adds current team's 4 quarters
                    try:
                        for z in range(0, num_periods):
                            total_quarters[z] += int(
                                data['period_scores'][y][0][z])
                    except Exception as error:
                        pass

                    #adds other team's 4 quarters
                    try:
                        for z in range(0, len(data['period_scores'][y][1])):
                            total_quarters[z + num_periods] += int(
                                data['period_scores'][y][1][z])
                    except Exception as error:
                        pass

                #gets average quarter scores
                for y in range(0, len(total_quarters)):
                    total_quarters[y] = total_quarters[y] / len(
                        data['other_team'])

                avg_quarters.append(total_quarters)

        to_return['avg_game_points'] = avg_game_points
        to_return['avg_other_game_points'] = avg_other_game_points
        to_return['avg_10_games'] = avg_10_games
        to_return['avg_other_10_games'] = avg_other_10_games
        to_return['avg_quarter_points'] = avg_quarters
        return to_return

    #gets records like 2016: 49-20 for all seasons
    def get_seasonal_records(self, original_data):
        records = []
        for x in range(0, len(original_data)):
            data = original_data[x]

            num_wins = 0
            for y in range(0, len(data['other_team'])):
                if data['game_scores'][y][0] > data['game_scores'][y][1]:
                    num_wins += 1

            # record=num_wins-len(data['game_scores'])-num_wins
            record = [num_wins, len(data['game_scores']) - num_wins]
            records.append(record)

        return records
Esempio n. 4
0
class Odds_Portal_Scraper:

	#can be nba, nhl, nfl, mlb
	league="nba"


	def __init__(self, league):
		self.league=league.lower()
		self.universal=Universal_Functions(self.league)

		self.league_teams=self.universal.load_league_teams()





	def scrape_historical_odds(self):

		season_urls=self.get_seasons(self.league)


		for x in range(0, len(season_urls)):
		# x=0

			try:
				#http://www.oddsportal.com/hockey/usa/nhl-2015-2016/results/
				if self.league=="nba" or self.league=="nhl":
					season_year=season_urls[x].split("-")[2].split("/")[0]
				#http://www.oddsportal.com/baseball/usa/mlb-2015/results/
				else:
					season_year=season_urls[x].split("-")[1].split("/")[0]
			except Exception as error:
				print("Invalid season: "+season_urls[x])
				continue


			print("Season year: "+str(season_year))
			path="./"+str(self.league)+"/oddsportal_odds/odds_"+str(season_year)+".csv"
			if os.path.exists(path)==False:

				#gets Page ID needed for url
				data=self.universal.scrape_webpage(season_urls[x])
				#if page timed out
				if data=="":
					time.sleep(10)
					data=self.universal.scrape_webpage(season_urls[x])

				to_find='new PageTournament({"id":"'
				start=data.find(to_find)+len(to_find)
				page_id=data[start : start+data[start:].find('"')]
				print("Season url: "+str(season_urls[x]))



				page_num=1
				to_save=[]
				#will break once last page is reached
				while True:
					url="http://fb.oddsportal.com/ajax-sport-country-tournament-archive/3/"+str(page_id)+"/X0/1/0/"+str(page_num)+"/"
					print("Url: "+str(url))

					headers=[('Referer', season_urls[x])]
					data=self.universal.scrape_webpage(url, headers)
					#if page timed out
					if data=="":
						time.sleep(10)
						data=self.universal.scrape_webpage(url)



					#removes unnecessary data
					data=data.replace("\\", "")
					data=data.split('{"html":"')[1]

					#splits games
					split=data.split("table-participant")[1:]

					
					for y in range(0, len(split)):
						# print(split[year])

						try:
							#game is in progress, so don't get odds
							if "in-play" not in split[y] and "inplay" not in split[y] and "play offs" not in split[y].lower():

								row=[]

								#gets teams
								if "/"+self.league+"/" in split[x]:
									start=split[y].find("/"+self.league+"/")+len("/"+self.league+"/")
									end=split[y].find('/"')
									teams=split[y][start:end]
								else:
									to_find="/"+self.league
									start=split[y].find(to_find)+len(to_find)
									start=start+split[y][start:].find("/")+1
									end=start+split[y][start:].find('/"')
									teams=split[y][start:end]

								
								temp_team=teams.split("-")
								temp_team.pop()
								teams="-".join(temp_team)

								# print("    Teams: "+str(teams))

								home_team=[]
								away_team=[]
								for z in range(0, len(self.league_teams)):
									#if team is in game
									if self.league_teams[z][1] in teams:
										#first team listed is home team for some reason
										if teams.index(self.league_teams[z][1])==0:
											home_team=self.league_teams[z]
										else:
											away_team=self.league_teams[z]

								# print("    Home team: "+str(home_team))
								# print("    Away team: "+str(away_team))

								row.append(away_team[0])
								row.append(home_team[0])


								#gets score
								to_find='table-score">'
								start=split[y].find(to_find)+len(to_find)
								score=split[y][ start: start+split[y][start:].find("</td>")]
								#remove OT
								score=score.replace("\xa0OT", "")
								score=score.split(":")
								# print("Score: "+str(score))

								row.append(score[1]+"-"+score[0])


								#gets odds
								to_find='xodd="'
								start=split[y].find(to_find)+len(to_find)
								end=split[y][start:].find('"')+start
								odds_home=self.decode(split[y][start:end])

								to_find='xodd="'
								temp=split[y][end:]
								start=temp.find(to_find)+len(to_find)
								end=temp[start:].find('"')+start
								odds_away=self.decode(temp[start:end])

								# print("   Home odds: "+str(odds_home))
								# print("   Away odds: "+str(odds_away))

								row.append(odds_away)
								row.append(odds_home)
							
								to_save.append(row)

						except Exception as error:
							print(error)
						
					if len(split)<=1:
						break
					else:
						page_num+=1
						

				if len(to_save)>0:
					path="./"+str(self.league)+"/oddsportal_odds/odds_"+str(season_year)+".csv"
					print(path+" | "+self.league+" | "+str(season_year))
					self.universal.save_to_csv(path, to_save)
				else:
					print("Not enough data to save "+str(season_year))



	def decode(self, odds):
		new_string=odds.replace("a", "1").replace("x", "2").replace("c", "3").replace("t", "4").replace("e", "5").replace("o", "6").replace("p", "7").replace("z", '.').replace("f", '|')

		split=new_string.split("|")
		# print("Format: "+str(formatUS(float(split[0])))+" | "+str(formatUS(float(split[1]))))
		return self.formatUS(float(split[1]))

	def formatUS(self, number):
		if (number >= 2):
			return int((number - 1) * 100)
		elif (number != 1):
			return -int(100 / (number - 1))
		else:
			return 0


			






	#gets years for listed seasons on ESPN's website
	def get_seasons(self, league):

		if league=="nba":
			url="http://www.oddsportal.com/basketball/usa/nba/results/"
		elif league=="nhl":
			url="http://www.oddsportal.com/hockey/usa/nhl/results/"
		elif league=="mlb":
			url="http://www.oddsportal.com/baseball/usa/mlb/results/"


		data=self.universal.scrape_webpage(url)

		start=data.index("<!-- PAGE BODY -->")
		end=data[start:].index("<!--  END PAGE BODY -->")

		new_data=data[start : start+end]

		# print(new_data)
		split=new_data.split('<strong><a href="')
		#removes excess form data
		for x in range(0, 5):
			split.pop(0)


		#retrieves season's year from URL in select HTML element
		to_return=[]
		for item in split:
			url=item[:item.find('"')]

			url="http://www.oddsportal.com"+str(url)
			print(url)

			to_return.append(url)

		return to_return