def test_foreign_key_constraints(self, database, session, sample_data_operator): """Test if foreign key constraints work by inserting expected data then raising an error on invalid data.""" parent_cols = sample_data_operator.columns database.map_table("parent_tbl", parent_cols) database.create_tables() database.clear_mappers() parent_tbl = database.table_mappings["parent_tbl"] parent_rows = sample_data_operator.rows session.add_all([parent_tbl(**row) for row in parent_rows]) session.commit() child_data = DataOperator({"fk_id": [1, 22]}) child_cols = child_data.columns child_cols['fk_id'].append(ForeignKey('parent_tbl.id')) database.map_table('child_tbl', child_cols) database.create_tables() database.clear_mappers() child_tbl = database.table_mappings['child_tbl'] rows = child_data.rows # inserts '1' which exists in sample_tbl.id. Should not raise an error session.add(child_tbl(**rows[0])) session.commit() # inserts '22' which does not exists in sample_tbl.id. Should raise an error session.add(child_tbl(**rows[1])) with pytest.raises(IntegrityError): session.commit()
def test_list_row_generator(self, sample_row_data): """Assert that rows are correctly formatted into a list of dictionaries when DataOperator is passed a list""" data = DataOperator(sample_row_data) rows = data.rows assert isinstance(rows, list) assert rows[0] == { 'strings': 'hi', 'ints': 1, 'floats': 1.1, 'dates': datetime(2019, 1, 1) } assert rows[1] == { 'strings': 'world', 'ints': 2, 'floats': 2.2, 'dates': datetime(2019, 1, 2) } assert rows[2] == { 'strings': 'bye', 'ints': 3, 'floats': 3.3, 'dates': datetime(2019, 1, 3) } assert rows[3] == { 'strings': 'school', 'ints': 4, 'floats': 4.4444, 'dates': datetime(2019, 1, 4) }
def get_sample_prediction(session, regression): """Generate and return a sample prediction formatted specifically for table creation. Args: session: A SQLalchemy session object regression: A regression object from four_factor_regression.py Returns: A DataOperator object initialized with a prediction from regression """ one_row_dataframe = regression.predictors.loc[[0]] sample_prediction = predict_game(session, regression, one_row_dataframe) data = DataOperator(sample_prediction) return data
def test_dict_column_generator(self, sample_dict_data): """Assert that columns reflect the expected SQLalchemy column type when DataOperator is passed a dictionary""" data = DataOperator(sample_dict_data) columns = data.columns assert columns['strings'] == [ String ], "Incorrect SQLalchemy type returned by DataOperator.columns" assert columns['ints'] == [ Integer ], "Incorrect SQLalchemy type returned by DataOperator.columns" assert columns['floats'] == [ Float ], "Incorrect SQLalchemy type returned by DataOperator.columns" assert columns['dates'] == [ DateTime ], "Incorrect SQLalchemy type returned by DataOperator.columns"
def predict_games_on_date(database, session, league_year, date, console_out): """Predict games on the specified date and write the results to the database ToDO: On day versus on date? Args: database: An instantiated DBInterface class from dbinterface.py session: A sqlalchemy session object for queries and writes league_year: The league year to work with. For example, the league year of the 2018-19 season is 2019 date: Either a datetime.date or a dictionary keyed formatted as {"day": day, "month": month, "year": year"} console_out: If true, prints prediction results to the console """ # Get lines for the games if not isinstance(date, datetime): date = datetime(date["year"], date["month"], date["day"]) odds_tbl = database.get_table_mappings(["odds_{}".format(league_year)]) games_query = getters.get_spreads_for_date(odds_tbl, session, date) game_spreads = [game for game in games_query] results = predict_games_on_day(database, session, game_spreads, console_out=console_out) prediction_tbl = "predictions_{}".format(league_year) data = DataOperator(results) sched_tbl = database.get_table_mappings("sched_{}".format(league_year)) pred_tbl = database.get_table_mappings("predictions_{}".format(league_year)) # Results are sent to DataOperator in row format, so just pass data.data instead of data.dict_to_rows() try: insert_predictions(data.data, session, pred_tbl, sched_tbl, odds_tbl) session.commit() except IntegrityError: session.rollback() update_prediction_table(session, pred_tbl, sched_tbl, odds_tbl) session.commit() finally: session.close()
def main(db): year = Config.get_property("league_year") session = nbapredict.management.Session(bind=db.engine) # ~~~~~~~~~~~~~ # Teams # ~~~~~~~~~~~~~ team_dict = team_scraper.scrape() teams_data = DataOperator({"team_name": team_dict["team_name"]}) teams_tbl_name = "teams_{}".format(year) if not db.table_exists(teams_tbl_name): teams.create_team_table(db=db, teams_data=teams_data, tbl_name=teams_tbl_name) teams_tbl = db.table_mappings[teams_tbl_name] session.add_all([teams_tbl(**row) for row in teams_data.rows]) session.commit() del teams_tbl # ~~~~~~~~~~~~~ # Team Stats # ~~~~~~~~~~~~~ team_stats_tbl_name = "team_stats_{}".format(year) teams_tbl = db.table_mappings[teams_tbl_name] team_dict['team_id'] = team_dict.pop('team_name') team_dict['team_id'] = convert.values_to_foreign_key( session=session, foreign_tbl=teams_tbl, foreign_key="id", foreign_value="team_name", child_data=team_dict['team_id']) # When team_stats_tbl is created, the teams_tbl automap object is changed. The changed format does not follow # the expected behavior of an automapped table. I suspect this is because a relationship is established. # If we reloaded, teams_tbl works fine. Therefore, delete the variable here for now del teams_tbl team_dict['scrape_date'] = [ datetime.date(s_time) for s_time in team_dict['scrape_time'] ] team_stats_data = DataOperator(team_dict) if not db.table_exists(team_stats_tbl_name): team_stats.create_table(db=db, team_stats_data=team_stats_data, tbl_name=team_stats_tbl_name) team_stats_tbl = db.table_mappings[team_stats_tbl_name] session.add_all( [team_stats_tbl(**row) for row in team_stats_data.rows]) session.commit() else: team_stats_tbl = db.table_mappings[team_stats_tbl_name] team_stats.insert(session, team_stats_tbl, team_stats_data) # ~~~~~~~~~~~~~ # Schedule # ~~~~~~~~~~~~~ schedule_dict = season_scraper.scrape() schedule_data = DataOperator(schedule_dict) teams_tbl = db.table_mappings['teams_{}'.format(year)] schedule_data = schedule.format_data(session=session, schedule_data=schedule_data, team_tbl=teams_tbl, team_stats_tbl=team_stats_tbl) schedule_tbl_name = "schedule_{}".format(year) if not db.table_exists(schedule_tbl_name): schedule.create_table(db, schedule_data, schedule_tbl_name, teams_tbl, team_stats_tbl) schedule_tbl = db.table_mappings[schedule_tbl_name] session.add_all([schedule_tbl(**row) for row in schedule_data.rows]) session.commit() else: schedule_tbl = db.table_mappings[schedule_tbl_name] update_rows = schedule.update_table(session, schedule_data, schedule_tbl, team_stats_tbl) session.add_all(update_rows) session.commit() # ~~~~~~~~~~~~~ # Odds # ~~~~~~~~~~~~~ odds_dict = line_scraper.scrape() odds_data = None if odds_dict: odds_dict = odds.format_data(session, odds_dict, teams_tbl, schedule_tbl) odds_data = DataOperator(odds_dict) # Evaluate if you have the correct columns in odds_data (i.e. home\away team id's) odds_tbl_name = "odds_{}".format(year) if not db.table_exists(odds_tbl_name) and odds_data: odds.create_table(db, odds_tbl_name, odds_data, schedule_tbl) odds_tbl = db.table_mappings[odds_tbl_name] session.add_all(odds_tbl(**row) for row in odds_data.rows) session.commit() elif odds_data: odds_tbl = db.table_mappings[odds_tbl_name] session.add_all(odds_tbl(**row) for row in odds_data.rows) session.commit() odds.update_table(session, odds_tbl, odds_data) session.commit() odds.delete(session, odds_tbl) session.close()
def sample_data_operator(sample_dict_data): data = DataOperator(sample_dict_data) return data