예제 #1
0
def main(session, team_stats_tbl, sched_tbl, graph=False):
    """Create a regression data frame, run a regression through the LinearRegression class, and return the class

    Args:
        session: An instantiated Session object from sqlalchemy
        team_stats_tbl: A mapped team stats table class
        sched_tbl: A mapped schedule table class
        graph: A boolean that creates graphs if true

    Returns:
        A LinearRegression class
    """
    league_year = Config.get_property("league_year")
    graph_dir = Config.get_property("graph_dir")
    if not os.path.exists(graph_dir) and graph:
        os.mkdir(graph_dir)

    # Import and specify a list of factors to extract from database
    ff_list = four_factors_list()

    # regression_df = create_ff_regression_df(session, team_stats_tbl, sched_tbl, ff_list)
    regression_df = alt_regression_df(session, team_stats_tbl, sched_tbl,
                                      ff_list)
    print('using alternative/old regression_df')

    # Separate DF's into them into X (predictors) and y (target)
    predictors = regression_df[regression_df.columns.drop(
        list(regression_df.filter(regex='sched')))]
    target = regression_df["sched_MOV"]

    ff_reg = LinearRegression(target, predictors)

    # Note: On Windows, graphs will not appear to update
    # To change that, go to folder properties -> customize -> optimize for: Documents
    if graph:
        ff_reg.predicted_vs_actual(out_path=os.path.join(
            graph_dir, "pred_vs_actual_{}.png".format(league_year)))
        ff_reg.residuals_vs_fitted(out_path=os.path.join(
            graph_dir, "residuals_vs_fitted_{}.png".format(league_year)))
        ff_reg.qqplot(out_path=os.path.join(
            graph_dir, "qqplot_{}.png".format(league_year)))
        ff_reg.influence_plot(out_path=os.path.join(
            graph_dir, "influence_{}.png".format(league_year)))
        ff_reg.cooks_distance(out_path=os.path.join(
            graph_dir, "cooks_distance_{}.png".format(league_year)))
        ff_reg.residual_independence(out_path=os.path.join(
            graph_dir, "resid_independence_{}.png".format(league_year)))

    # Multicollinearity
    # vif_df = ff_reg.vif()
    # ff_reg.residual_distribution()

    return ff_reg
예제 #2
0
def scrape():
    """Scrapes betting line information from bovada and adds it to the session"""
    league_year = Config.get_property("league_year")
    lines = odds_for_today()
    if not lines:
        return False
    return lines

    line_data = DataOperator(lines)

    tbl_name = "odds_{}".format(league_year)
    tbl_exists = database.table_exists(tbl_name)
    if not tbl_exists:
        create_odds_table(database, line_data, tbl_name, schedule)
        tbl_exists = database.table_exists(tbl_name)

    if line_data.validate_data_length() and tbl_exists:
        # All values in line_data are expected to be be unique from values in the database. A possible place for errors
        # to occur
        odds_table = database.get_table_mappings([tbl_name])

        # Reconcile ensures the odds_table has appropriate start_times; Add logic so its not called every run
        reconcile(schedule, odds_table, "start_time", "id", "game_id", session)

        update_odds_table(odds_table, schedule, line_data.dict_to_rows(),
                          session)
    else:
        raise Exception(
            "Something is wrong here (Not descriptive, but this point shouldn't be hit.)"
        )

    return True
예제 #3
0
    def __init__(self, url=None):
        """Initialize macro-level SQLalchemy objects as class attributes (engine, metadata, base).

        A session will allow interaction with the DB."""
        if not url:
            file_path = os.getcwd()
            self.path = Config.get_property("database")
        else:
            self.path = url
        self.engine = create_engine(self.path, pool_pre_ping=True)
        self.metadata = MetaData(self.engine)
        self.Base = declarative_base()
예제 #4
0
def main():
    db = dbinterface.DBInterface()
    session = Session(bind=db.engine)
    league_year = Config.get_property("league_year")

    regression = lm.main(db, session)
    sched_tbl = db.get_table_mappings("sched_{}".format(league_year))

    if not db.table_exists("pred"):
        # Returns a data manipulator class
        sample = get.sample_prediction(db, session, ref_tbl=sched_tbl, model=regression)
        create_prediction_table(db, sample, "game_pred_{}".format(league_year))
예제 #5
0
파일: all.py 프로젝트: anshu7919/NBApredict
def run_all():
    """Run the entire NBA_bet project."""
    db = DBInterface()
    year = Config.get_property("league_year")
    session = Session(bind=db.engine)

    scraper.scrape_all(db, session, year)
    session.commit()

    bets.predict_all(db, session)
    session.commit()
    session.close()
예제 #6
0
def scrape():
    """Scrape basketball reference for games in a season, parse the output, and write the output to a database.

    If the specified year has been completed, it will return every game in the season. If the season is ongoing, it will
    return every game up to the day before the module is run. This ensures only completed games are returned.

    Args:
        database: An instantiated DBInterface object from database.database for database interactions
        session: A SQLalchemy session object
    """
    league_year = Config.get_property("league_year")

    # Create table
    season_data = client.season_schedule(league_year)
    season_data = br_enum_to_string(season_data)
    return season_data
예제 #7
0
def create_table(db, team_stats_data, tbl_name):
    """Create a table of team stats in a database with appropriate foreign keys and constraints.

    Args:
        db: a datotable.database.Database object connected to a database
        team_stats_data: A datatotable.data.DataOperator object with data on NBA team stats
        tbl_name: The desired table name
    ToDo: Currently allows duplicate rows if those values are on different days. Solve with a constraint
    """
    columns = team_stats_data.columns
    columns['team_id'].append(
        ForeignKey("teams_{}.id".format(Config.get_property('league_year'))))
    constraints = [UniqueConstraint("team_id", "scrape_time")]
    db.map_table(tbl_name=tbl_name, columns=columns, constraints=constraints)
    db.create_tables()
    db.clear_mappers()
예제 #8
0
def predict_all(db):
    """Generate and store predictions for all games available in the odds table.

    Checks if the table exists. If it doesn't, generate a table in the database.
    """
    session = Session(bind=db.engine)
    league_year = Config.get_property("league_year")
    sched_tbl = db.table_mappings["schedule_{}".format(league_year)]
    team_stats_tbl = db.table_mappings['team_stats_{}'.format(league_year)]
    odds_tbl = db.table_mappings['odds_{}'.format(league_year)]

    regression = ff_reg.main(session, team_stats_tbl, sched_tbl)

    pred_tbl_name = "predictions_{}".format(league_year)

    if not db.table_exists(pred_tbl_name):
        sample = get_sample_prediction(session, regression, sched_tbl)
        pred_data = predictions.format_data()
        predictions.create_table()
        pred_tbl = db.table_mappings[pred_tbl_name]
        session.add_all([pred_tbl(**row) for row in pred_data.rows])
        session.commit()
    else:
        # Data operator
        pred_tbl = db.table_mappings[pred_tbl_name]
        schedule_tbl = db.table_mappings[pred_tbl_name]
        update_rows = predictions.insert(session, )
        results = predict_games_in_odds(session, regression, odds_tbl)
        session.add_all(update_rows)
        session.commit()

    insert_new_predictions(results, session, pred_tbl, sched_tbl, odds_tbl)

    session.commit()  # Commit here b/c update_prediction_tbl() needs the inserted values

    update_prediction_table(session, pred_tbl, sched_tbl, odds_tbl)
예제 #9
0
def team_statistics(tbl_name):
    """Build a URL for the specified year and return team statistics for the specified table on that page.

    Performance not guaranteed for tables that are not "misc_stats"

    Args:
        tbl_name: The name of the table to be returned

    Returns:
        A dictionary version of the specified table. Keys are column titles that return lists ordered by team.
    """

    url = '{BASE_URL}/leagues/NBA_{year}.html'.format(
        BASE_URL=BASE_URL,  # imported from br_references.py
        year=Config.get_property("league_year"))

    response = requests.get(url=url, allow_redirects=False)
    if 200 <= response.status_code < 300:
        scrape_time = datetime.now()
        return parse_table(
            response.content, tbl_name,
            scrape_time)  # Note that this uses the .content attribute

    raise Exception("Could not connect to URL")
예제 #10
0
def main(db):
    year = Config.get_property("league_year")
    session = nbapredict.management.Session(bind=db.engine)

    # ~~~~~~~~~~~~~
    # Teams
    # ~~~~~~~~~~~~~
    team_dict = team_scraper.scrape()
    teams_data = DataOperator({"team_name": team_dict["team_name"]})
    teams_tbl_name = "teams_{}".format(year)
    if not db.table_exists(teams_tbl_name):
        teams.create_team_table(db=db,
                                teams_data=teams_data,
                                tbl_name=teams_tbl_name)
        teams_tbl = db.table_mappings[teams_tbl_name]
        session.add_all([teams_tbl(**row) for row in teams_data.rows])
        session.commit()
        del teams_tbl

    # ~~~~~~~~~~~~~
    # Team Stats
    # ~~~~~~~~~~~~~
    team_stats_tbl_name = "team_stats_{}".format(year)
    teams_tbl = db.table_mappings[teams_tbl_name]
    team_dict['team_id'] = team_dict.pop('team_name')
    team_dict['team_id'] = convert.values_to_foreign_key(
        session=session,
        foreign_tbl=teams_tbl,
        foreign_key="id",
        foreign_value="team_name",
        child_data=team_dict['team_id'])
    # When team_stats_tbl is created, the teams_tbl automap object is changed. The changed format does not follow
    # the expected behavior of an automapped table. I suspect this is because a relationship is established.
    # If we reloaded, teams_tbl works fine. Therefore, delete the variable here for now
    del teams_tbl
    team_dict['scrape_date'] = [
        datetime.date(s_time) for s_time in team_dict['scrape_time']
    ]
    team_stats_data = DataOperator(team_dict)
    if not db.table_exists(team_stats_tbl_name):
        team_stats.create_table(db=db,
                                team_stats_data=team_stats_data,
                                tbl_name=team_stats_tbl_name)
        team_stats_tbl = db.table_mappings[team_stats_tbl_name]
        session.add_all(
            [team_stats_tbl(**row) for row in team_stats_data.rows])
        session.commit()
    else:
        team_stats_tbl = db.table_mappings[team_stats_tbl_name]
        team_stats.insert(session, team_stats_tbl, team_stats_data)

    # ~~~~~~~~~~~~~
    # Schedule
    # ~~~~~~~~~~~~~
    schedule_dict = season_scraper.scrape()
    schedule_data = DataOperator(schedule_dict)
    teams_tbl = db.table_mappings['teams_{}'.format(year)]
    schedule_data = schedule.format_data(session=session,
                                         schedule_data=schedule_data,
                                         team_tbl=teams_tbl,
                                         team_stats_tbl=team_stats_tbl)
    schedule_tbl_name = "schedule_{}".format(year)
    if not db.table_exists(schedule_tbl_name):
        schedule.create_table(db, schedule_data, schedule_tbl_name, teams_tbl,
                              team_stats_tbl)
        schedule_tbl = db.table_mappings[schedule_tbl_name]
        session.add_all([schedule_tbl(**row) for row in schedule_data.rows])
        session.commit()
    else:
        schedule_tbl = db.table_mappings[schedule_tbl_name]
        update_rows = schedule.update_table(session, schedule_data,
                                            schedule_tbl, team_stats_tbl)
        session.add_all(update_rows)
        session.commit()

    # ~~~~~~~~~~~~~
    # Odds
    # ~~~~~~~~~~~~~
    odds_dict = line_scraper.scrape()
    odds_data = None
    if odds_dict:
        odds_dict = odds.format_data(session, odds_dict, teams_tbl,
                                     schedule_tbl)
        odds_data = DataOperator(odds_dict)
    # Evaluate if you have the correct columns in odds_data (i.e. home\away team id's)
    odds_tbl_name = "odds_{}".format(year)
    if not db.table_exists(odds_tbl_name) and odds_data:
        odds.create_table(db, odds_tbl_name, odds_data, schedule_tbl)
        odds_tbl = db.table_mappings[odds_tbl_name]
        session.add_all(odds_tbl(**row) for row in odds_data.rows)
        session.commit()
    elif odds_data:
        odds_tbl = db.table_mappings[odds_tbl_name]
        session.add_all(odds_tbl(**row) for row in odds_data.rows)
        session.commit()
        odds.update_table(session, odds_tbl, odds_data)
        session.commit()
        odds.delete(session, odds_tbl)

    session.close()
예제 #11
0
    # ~~~~~~~~~~~~~
    # Odds
    # ~~~~~~~~~~~~~
    odds_dict = line_scraper.scrape()
    odds_data = None
    if odds_dict:
        odds_dict = odds.format_data(session, odds_dict, teams_tbl,
                                     schedule_tbl)
        odds_data = DataOperator(odds_dict)
    # Evaluate if you have the correct columns in odds_data (i.e. home\away team id's)
    odds_tbl_name = "odds_{}".format(year)
    if not db.table_exists(odds_tbl_name) and odds_data:
        odds.create_table(db, odds_tbl_name, odds_data, schedule_tbl)
        odds_tbl = db.table_mappings[odds_tbl_name]
        session.add_all(odds_tbl(**row) for row in odds_data.rows)
        session.commit()
    elif odds_data:
        odds_tbl = db.table_mappings[odds_tbl_name]
        session.add_all(odds_tbl(**row) for row in odds_data.rows)
        session.commit()
        odds.update_table(session, odds_tbl, odds_data)
        session.commit()
        odds.delete(session, odds_tbl)

    session.close()


if __name__ == "__main__":
    db = Database("test", Config.get_property("outputs"))
    main(db)
예제 #12
0
def odds_for_today():
    """Match betting odds from Bovada to the games_query and return the odds

    Args:
        date to reflect the current games on Bovada.

    Returns:
        A dictionary where the column keys lists of values
    """
    scrape_time = datetime.now()

    # Check for response from Bovada
    url = Config.get_property("regularURL")
    response = bovada_json_request(url)
    if not response:
        url = Config.get_property("playoffURL")
        response = bovada_json_request(url)
        if not response:
            return None

    # Move down tree towards games
    events = response[0]["events"]

    # Strip games from the 'event's object (which holds a bunch of random information)
    bovada_games = [
        e for e in events
        if e['description'].count('@') > 0 and e['type'] == 'GAMEEVENT'
    ]
    if not bovada_games:
        return None

    # Set-up the line dictionary which stores data in the correct table format
    lines = {
        "home_team": [],
        "away_team": [],
        'start_time': [],
        "spread": [],
        "home_spread_price": [],
        "away_spread_price": [],
        "home_moneyline": [],
        "away_moneyline": [],
        "scrape_time": []
    }

    # Iterate through each game returned by bovada and store its information
    for game in bovada_games:
        link = game['link'].split('-')
        link = link[len(link) - 1]
        str_time = re.findall('[0-9]', link)
        start_time = ''.join(str_time)
        start_time = datetime.strptime(start_time, "%Y%m%d%H%M")
        if datetime.now() > start_time:
            # An ongoing game will not have the correct betting data. We don't want to store this information
            print(
                "This game ({}) is either ongoing or completed. Not scraping".
                format(game['description']))
            continue

        home_team, away_team = parse_teams(game["competitors"])

        # Get only the full match betting information from the game object
        betting_info = game["displayGroups"][0]["markets"]
        full_match_bets = [
            bet for bet in betting_info
            if bet["period"]["description"] == "Match"
        ]

        # Extract the betting data associated with the game
        money_lines = False
        for bet in full_match_bets:
            if bet["description"] == "Moneyline":
                home_moneyline, away_moneyline = parse_moneyline(bet)
                if home_moneyline == "":
                    home_moneyline = None
                if away_moneyline == "":
                    away_moneyline = None
                money_lines = True
            elif bet["description"] == "Point Spread":
                spread, home_spread_price, away_spread_price = parse_spread(
                    bet)
                if spread == "":
                    spread = None
                if home_spread_price == "":
                    home_spread_price = None
                if away_spread_price == "":
                    away_spread_price = None
        if not money_lines:
            home_moneyline = None
            away_moneyline = None

        game_lines = [
            home_team, away_team, start_time, spread, home_spread_price,
            away_spread_price, home_moneyline, away_moneyline, scrape_time
        ]

        # This section depends on python 3.7+ to preserve the order of dict keys in lines
        i = 0
        for key in lines:
            lines[key].append(game_lines[i])
            i += 1
    return lines
예제 #13
0
    # To change that, go to folder properties -> customize -> optimize for: Documents
    if graph:
        ff_reg.predicted_vs_actual(out_path=os.path.join(
            graph_dir, "pred_vs_actual_{}.png".format(league_year)))
        ff_reg.residuals_vs_fitted(out_path=os.path.join(
            graph_dir, "residuals_vs_fitted_{}.png".format(league_year)))
        ff_reg.qqplot(out_path=os.path.join(
            graph_dir, "qqplot_{}.png".format(league_year)))
        ff_reg.influence_plot(out_path=os.path.join(
            graph_dir, "influence_{}.png".format(league_year)))
        ff_reg.cooks_distance(out_path=os.path.join(
            graph_dir, "cooks_distance_{}.png".format(league_year)))
        ff_reg.residual_independence(out_path=os.path.join(
            graph_dir, "resid_independence_{}.png".format(league_year)))

    # Multicollinearity
    # vif_df = ff_reg.vif()
    # ff_reg.residual_distribution()

    return ff_reg


if __name__ == "__main__":
    db = Database('test', "../management")
    session = Session(db.engine)
    year = Config.get_property('league_year')
    sched_tbl = db.table_mappings['schedule_{}'.format(year)]
    team_stats_tbl = db.table_mappings['team_stats_{}'.format(year)]
    test = main(session, graph=True)
    t = 2