Exemple #1
0
def get_raw_data():
    sql_cols = [
        f"oa_{prefix}_{m}"
        for m in OUTPUT_METRIC_DICT.keys()
        for prefix in ["tm", "opp"]
    ]

    tm1_sql_cols = [f"{x} as tm1_{x}" for x in sql_cols]
    tm2_sql_cols = [f"{x} as tm2_{x}" for x in sql_cols]
    q = f"""
    select
        g.date
        ,g.season
        ,g.game_key
        ,g.tm_teamid
        ,g.opp_teamid
        ,g.tm_pts
        ,g.opp_pts
        ,g.tm_win
        ,v.tm_closeline
        ,v.tm_ml
        ,td1.{', td1.'.join(tm1_sql_cols)}
        ,td2.{', td2.'.join(tm2_sql_cols)}
    from games g
    left join teamdates td1
        on g.date = td1.date
        and g.tm_teamid = td1.teamid
    left join teamdates td2
        on g.date = td2.date
        and g.opp_teamid = td2.teamid
    left join calendar c
        on c.date = g.date
    left join vegas_lines v
        on g.season = v.season
        and g.date = v.date
        and g.game_key = v.game_key
        and g.tm_teamid = v.tm_teamid
    where c.day_num >= 60
    order by g.date, g.game_key, g.tm_teamid
    -- limit 1000
    """
    raw_td = readSql(q)
    raw_td.dropna(
        how="any",
        subset=[col for col in raw_td.columns if col[:3] in ["tm1", "tm2"]],
        inplace=True,
    )
    return raw_td
Exemple #2
0
def load_calendar():
    "Relies on games to be loaded into database"

    q = """
    select
        season
        ,min(date) as min
        ,max(date) as max
    from games
    group by season
    """
    seasonGameDates = readSql(q)
    season_list: list[int] = []
    dates: list[str] = []
    day_nums: list[int] = []
    for seas, series in seasonGameDates.iterrows():
        season_dates = list(
            pd.date_range(series["min"], series["max"], freq="D").strftime("%Y-%m-%d")
        )
        dates += season_dates
        season_list += [seas] * len(season_dates)
        day_nums += [x + 1 for x in range(len(season_dates))]
    cal = pd.DataFrame({"season": season_list, "date": dates, "day_num": day_nums})

    # drop old, make table, add indexe, load data
    executeSql("drop table if exists calendar")
    q = f"""
    create table calendar (
        season integer not null,
        date TEXT not null,
        day_num integer not null,
        primary key (season asc, date asc)
    )
    """
    executeSql(q)
    for p in get_unique_permutations(cal.columns):
        executeSql(f"CREATE INDEX calendar_{'_'.join(p)} ON calendar ({', '.join(p)})")
    dfToTable(cal, "calendar", "ncaa.db", ifExists="append")
Exemple #3
0
def load_games():
    "Depends on teams"

    # Read on data
    seasons = pd.read_csv("./data/MSeasons.csv")
    games = pd.read_csv("./data/MRegularSeasonDetailedResults.csv")

    # Normalize col names
    games.columns = [x.lower() for x in games.columns]
    seasons.columns = [x.lower() for x in seasons.columns]

    # Get dame dates
    seasons["dayzero"] = pd.to_datetime(seasons["dayzero"])
    games = games.merge(seasons[["season", "dayzero"]], on="season")
    games["date"] = games.apply(
        lambda row: (row["dayzero"] + timedelta(days=row["daynum"])).strftime(
            "%Y-%m-%d"
        ),
        axis=1,
    )
    del games["dayzero"], games["daynum"]

    # Rename columns
    rename_map = {"w": "tm", "l": "opp"}
    rename = {"numot": "num_ot", "wscore": "tm_pts", "lscore": "opp_pts"}
    for col in [
        "teamid",
        "loc",
        "fgm",
        "fga",
        "fgm3",
        "fga3",
        "ftm",
        "fta",
        "or",
        "dr",
        "ast",
        "to",
        "stl",
        "blk",
        "pf",
    ]:
        for prefix in ["w", "l"]:
            rename[f"{prefix}{col}"] = f"{rename_map[prefix]}_{col}"
    games.rename(columns=rename, inplace=True)
    games["opp_loc"] = np.where(
        games["tm_loc"] == "N", "N", np.where(games["tm_loc"] == "A", "H", "A")
    )

    # Add game key
    teams = readSql("select * from teams")
    teams_dict = {}
    for idx, row in teams.iterrows():
        teams_dict[row["teamid"]] = row["teamname"]
    games["game_key"] = games.apply(
        lambda x: f"{teams_dict[x['tm_teamid']]}>{teams_dict[x['opp_teamid']]}", axis=1
    )

    # Add additional columns
    for prefix in ["tm", "opp"]:
        games[f"{prefix}_fga2"] = games[f"{prefix}_fga"] - games[f"{prefix}_fga3"]
        games[f"{prefix}_fgm2"] = games[f"{prefix}_fgm"] - games[f"{prefix}_fgm3"]
        games[f"{prefix}_tr"] = games[f"{prefix}_or"] + games[f"{prefix}_dr"]
        games[f"{prefix}_mins"] = games[f"{prefix}_mins"] = 40 + games["num_ot"] * 5
        games[f"{prefix}_game"] = 1
        games[f"{prefix}_win"] = 1 * (
            games[f"{prefix}_pts"] > games[f"{OTHERPREFIXMAP[prefix]}_pts"]
        )
        games[f"{prefix}_loss"] = games[f"{prefix}_game"] - games[f"{prefix}_win"]
        # games[f"{prefix}_rsgame"] = 1
        games[f"{prefix}_poss"] = (
            (
                games["tm_fga"]
                + 0.4 * games["tm_fta"]
                - 1.07
                * (games["tm_or"] / (games["tm_or"] + games["opp_dr"]))
                * (games["tm_fga"] - games["tm_fgm"])
                + games["tm_to"]
            )
            + (
                games["opp_fga"]
                + 0.4 * games["opp_fta"]
                - 1.07
                * (games["opp_or"] / (games["opp_or"] + games["tm_dr"]))
                * (games["opp_fga"] - games["opp_fgm"])
                + games["opp_to"]
            )
        ) * 0.5
        games[f"{prefix}_margin"] = (
            games[f"{prefix}_pts"] - games[f"{OTHERPREFIXMAP[prefix]}_pts"]
        )
        games[f"{prefix}_availor"] = (
            games[f"{prefix}_or"] + games[f"{OTHERPREFIXMAP[prefix]}_dr"]
        )
    del games["num_ot"]

    # Duplicate and rename
    def rename_col(col):
        if fnmatch(col, "tm_*"):
            return f"opp_{col[3:]}"
        elif fnmatch(col, "opp_*"):
            return f"tm_{col[4:]}"
        else:
            return col

    dup_games = games.copy()
    dup_games.columns = [rename_col(col) for col in dup_games.columns]
    games = pd.concat([games, dup_games], ignore_index=True)
    games = games.sort_values(by=["date", "game_key"]).reset_index(drop=True)

    # Reorder columns for easier analysis
    first_cols = [
        "date",
        "season",
        "game_key",
        "tm_teamid",
        "opp_teamid",
        "tm_loc",
        "opp_loc",
        "tm_pts",
        "opp_pts",
    ]
    remainder_cols = sorted([x for x in games.columns if x not in first_cols])
    games = games[first_cols + remainder_cols]

    # TODO add postseason

    # Drop old, set up new, add indexes, load data
    executeSql("drop table if exists games")
    q = f"""
    create table games (
        season integer not null,
        date TEXT not null,
        game_key TEXT not null,
        tm_teamid integer not null,
        opp_teamid integer not null,
        tm_loc string not null,
        opp_loc string not null,
        tm_pts integer not null,
        opp_pts integer not null,
        {' real not null, '.join(remainder_cols)} real not null,
        primary key (season, date asc, game_key asc, tm_teamid asc)
    )
    """
    executeSql(q)

    perms = get_unique_permutations(
        ["date", "season", "game_key", "tm_teamid", "opp_teamid"]
    )
    log.info(f"Creating {len(perms)} indexes on games")
    for p in perms:
        executeSql(f"CREATE INDEX games_{'_'.join(p)} on games ({', '.join(p)})")
    dfToTable(
        games,
        table="games",
        db="ncaa.db",
        ifExists="append",
    )
Exemple #4
0
def single_season_vegas_load(season: int):
    df = pd.read_excel(
        f"https://www.sportsbookreviewsonline.com/scoresoddsarchives/ncaabasketball/ncaa%20basketball%20{season - 1}-{str(season)[-2:]}.xlsx"
    )
    df.columns = [x.lower() for x in df.columns]

    # Drop bad dates - date must be >100
    pre_drop_len = len(df)
    bad_dates = df["date"] < 100
    log.debug(f"Dropping {sum(bad_dates) * 2:,.0f} bad dated records")
    idx_to_drop = []
    for idx, _ in df.loc[bad_dates].iterrows():
        idx_to_drop.append(idx)
        if idx % 2 == 1:  # If odd, drop even number prior
            idx_to_drop.append(idx - 1)
        else:  # If even, drop next number
            idx_to_drop.append(idx + 1)
    df = df.drop(idx_to_drop).reset_index(drop=True)
    assert len(df) == pre_drop_len - sum(bad_dates) * 2, "missing a date or two"

    # Parse date from integer date column
    df[["mo", "dy"]] = df["date"].apply(lambda x: [str(x)[:-2], str(x)[-2:]]).to_list()
    df["year"] = np.where(df["mo"].astype(int) < 7, str(season), str(season - 1))
    df["date"] = pd.to_datetime(
        df["year"] + "-" + df["mo"] + "-" + df["dy"]
    ).dt.strftime("%Y-%m-%d")
    df.drop(inplace=True, labels=["year", "mo", "dy"], axis=1)

    # Self-join to get all game data in one row
    df = pd.merge(
        df.iloc[[x for x in df.index if x % 2 == 0]].reset_index(drop=True),
        df.iloc[[x for x in df.index if x % 2 == 1]].reset_index(drop=True),
        left_index=True,
        right_index=True,
        suffixes=["_tm", "_opp"],
    )
    preLen = len(df)
    df = df.loc[df["date_tm"] == df["date_opp"]].reset_index(drop=True)
    df = df.loc[np.abs((df["rot_tm"] - df["rot_opp"])) == 1].reset_index(drop=True)
    if len(df) != preLen:
        log.debug(f"Dropped {preLen-len(df)} records due to date mismatch")
    assert (df["date_tm"] == df["date_opp"]).all(), "Mismatch dates"
    assert np.abs((df["rot_tm"] - df["rot_opp"])).max() == 1, "Mismatch rots"

    # remove unused rows, rename some things
    df.drop(
        ["date_opp", "rot_tm", "rot_opp"]
        + [f"{x}_{y}" for y in ["tm", "opp"] for x in ["1st", "2nd", "2h"]],
        axis=1,
        inplace=True,
    )
    df.rename(columns={"date_tm": "date"}, inplace=True)

    # Get team IDs
    ts = pd.read_csv("data/MTeamSpellings.csv", sep=",", encoding="cp1252")
    ts.columns = [x.lower() for x in ts.columns]
    ts["teamnamespelling_nospace"] = ts["teamnamespelling"].str.replace(" ", "")
    ts = ts.groupby(["teamid", "teamnamespelling_nospace"]).size().reset_index()
    for col in ["team_tm", "team_opp"]:
        df[col] = df[col].str.lower()
        df[col] = df[col].str.replace(".", "", regex=False)
        df[col] = df[col].str.replace("\xa0", "", regex=False)

    df = (
        pd.merge(
            df,
            ts[["teamnamespelling_nospace", "teamid"]],
            left_on=["team_tm"],
            right_on=["teamnamespelling_nospace"],
            how="left",
        )
        .rename(columns={"teamid": "tm_teamid"})
        .drop(["teamnamespelling_nospace"], axis=1)
        .merge(
            ts[["teamnamespelling_nospace", "teamid"]],
            left_on=["team_opp"],
            right_on=["teamnamespelling_nospace"],
            how="left",
        )
        .rename(columns={"teamid": "opp_teamid"})
        .drop(["teamnamespelling_nospace"], axis=1)
    )
    # TODO handle this
    sorted(
        list(
            set(
                list(df.loc[pd.isna(df["tm_teamid"])]["team_tm"].unique())
                + list(df.loc[pd.isna(df["opp_teamid"])]["team_opp"].unique())
            )
        )
    )

    # Drop missing teams
    preLen = len(df)
    df = df.loc[(~pd.isna(df["tm_teamid"])) & (~pd.isna(df["opp_teamid"]))].reset_index(
        drop=True
    )
    log.debug(
        f"Dropped {preLen - len(df)} records due to missing names {(preLen - len(df)) / preLen:.2%}"
    )

    # Figure out open and close
    for col in ["open_tm", "close_tm", "open_opp", "close_opp"]:
        df[col] = np.where(df[col].str.lower().isin(["pk", "p"]), 0, df[col])

    # Drop the NL values (no line)
    # preLen = len(df)
    # df = df.loc[
    #     ~(df[["open_tm", "close_tm", "open_opp", "close_opp"]] == "NL").any(axis=1)
    # ].reset_index(drop=True)
    # print(
    #     f"Dropped {preLen - len(df)} records due to NL vals {(preLen - len(df)) / preLen:.2%}"
    # )

    def get_line(r):
        "tm openline, tm_closeline, game_open_ou, game_close_ou, tm_ml, opp_ml"

        def _get_val_type(val: str | float) -> Literal["line", "ou", "NL"]:
            if isinstance(val, str):
                if val.strip() == "NL":
                    return "NL"
                # print(r)
                return "NL"
                # raise ValueError(val)
            if 0 <= val <= 70:
                return "line"
            if val > 70:
                return "ou"
            return "NL"

        if r["ml_tm"] == "NL":
            tm_ml = None
        else:
            tm_ml = r["ml_tm"]
        if r["ml_opp"] == "NL":
            opp_ml = None
        else:
            opp_ml = r["ml_opp"]

        types = (
            (_get_val_type(r["open_tm"]), _get_val_type(r["open_opp"])),
            (_get_val_type(r["close_tm"]), _get_val_type(r["close_opp"])),
        )

        # OU is on the side the favorite is
        # Open open, close close, tm-opp
        match types[0]:
            case ("ou", "line"):
                tm_openline = r["open_opp"]
                game_openou = r["open_tm"]
            case ("ou", "NL"):
                tm_openline = None
                game_openou = r["open_tm"]
            case ("NL", "line"):
                tm_openline = r["open_opp"]
                game_openou = None
            case ("line", "ou"):
                tm_openline = -r["open_tm"]
                game_openou = r["open_opp"]
            case ("NL", "ou"):
                tm_openline = None
                game_openou = r["open_opp"]
            case ("line", "NL"):
                tm_openline = -r["open_tm"]
                game_openou = None
            case ("NL", "NL"):
                tm_openline = None
                game_openou = None
            case ("line", "line"):
                tm_openline = None
                game_openou = None
            case _:
                print(r)
                raise ValueError(types[0])

        match types[1]:
            case ("ou", "line"):
                tm_closeline = r["close_opp"]
                game_closeou = r["close_tm"]
            case ("ou", "NL"):
                tm_closeline = None
                game_closeou = r["close_tm"]
            case ("NL", "line"):
                tm_closeline = r["close_opp"]
                game_closeou = None
            case ("line", "ou"):
                tm_closeline = -r["close_tm"]
                game_closeou = r["close_opp"]
            case ("NL", "ou"):
                tm_closeline = None
                game_closeou = r["close_opp"]
            case ("line", "NL"):
                tm_closeline = -r["close_tm"]
                game_closeou = None
            case ("NL", "NL"):
                tm_closeline = None
                game_closeou = None
            case ("ou", "ou"):
                tm_closeline = None
                game_closeou = None
            case ("line", "line"):
                tm_closeline = None
                game_closeou = None

            case _:
                print(r)
                raise ValueError(types[1])

        # assert max(np.abs(tm_openline), np.abs(tm_closeline)) < min(
        #     game_closeou, game_openou
        # )
        return (tm_openline, tm_closeline, game_openou, game_closeou, tm_ml, opp_ml)

    df[
        [
            "tm_openline",
            "tm_closeline",
            "game_openou",
            "game_closeou",
            "tm_ml",
            "opp_ml",
        ]
    ] = df.apply(lambda x: get_line(x), axis=1).to_list()

    for col in [
        "tm_openline",
        "tm_closeline",
        "game_openou",
        "game_closeou",
        "tm_ml",
        "opp_ml",
    ]:
        df[col] = df[col].astype(float, errors="ignore")

    # Bring together to games
    games = readSql(f"select * from games where season = {season}")
    df = pd.merge(
        df,
        games[["date", "tm_teamid", "opp_teamid", "season", "game_key"]],
        how="left",
        on=["date", "tm_teamid", "opp_teamid"],
    )

    preLen = len(df)
    df = df.loc[~pd.isna(df["game_key"])].reset_index(drop=True)
    log.debug(
        f"Dropped {preLen - len(df)} records due to no matching game {(preLen - len(df)) / preLen:.2%}"
    )

    # Duplicate for database
    dup = df.copy()
    dup.rename(
        columns={
            "tm_ml": "opp_ml",
            "opp_ml": "tm_ml",
            "tm_teamid": "opp_teamid",
            "opp_teamid": "tm_teamid",
        },
        inplace=True,
    )
    dup["tm_openline"] *= -1
    dup["tm_closeline"] *= -1
    df = pd.concat([df, dup], ignore_index=True)
    df = df.sort_values(by=["date", "game_key", "tm_teamid"]).reset_index(drop=True)

    df = df[
        [
            "season",
            "date",
            "game_key",
            "tm_teamid",
            "opp_teamid",
            "tm_openline",
            "tm_closeline",
            "tm_ml",
            "opp_ml",
            "game_openou",
            "game_closeou",
        ]
    ]
    return df
Exemple #5
0
def get_teamdates(season: int):
    # Get games for given season, cross join games to all dates to
    q = f"""
    with seasonDates as (
        select
            season
            ,min(date) as minDate
            ,max(date) as maxDate
            from games
            where season = {season}
    )
    ,dates as (
        select distinct 
            s.season
            ,c.date
        from calendar c
        left join seasonDates s
        where c.date between minDate and maxDate
    )
    ,teamList as (
        select distinct season, tm_teamid
        from games
        where season = {season}
    )
    ,allDatesAllTeams as (
        select
            dates.season
            ,dates.date
            ,teamList.tm_teamid
        from dates
        cross join teamList
        on teamList.season = dates.season
    )
    select 
        adat.*
        ,t.teamname
        ,{gb_sql}
    from allDatesAllTeams adat
    left join games g
        on adat.season = g.season
        and adat.tm_teamid = g.tm_teamid
        and adat.date > g.date
    left join teams t
        on adat.tm_teamid = t.teamid
    group by adat.season, adat.date, adat.tm_teamid
    order by adat.season, adat.tm_teamid, adat.date
    """
    df = readSql(q, db="ncaa.db")

    # Get all games for filtering
    q = f"""
    select 
        t.teamname
        ,g.*
    from games g
    left join teams t
    on g.tm_teamid = t.teamid
    where season = {season}
    """
    games = readSql(q)

    log.info("Executed sql")

    # Get predate opponent stats
    total_rows = len(df)

    # Function to get sum of opponent state prior to the give row's date
    def opp_stats(row: pd.Series):
        "Removes the team's games from the data, and duplicates by number of games v opp"

        if (row.name + 1) % 10000 == 0:
            log.info(f"Progress in apply: {(row.name + 1) / total_rows:.2%}")

        # Get list of all opponents (includes duplicate values)
        opp_list = games.loc[(games["date"] < row["date"]) & (
            games["tm_teamid"] == row["tm_teamid"])]["opp_teamid"].to_list()

        # Create DF of opponents-counts combinations, merge into all games by said opps
        opp_df = pd.DataFrame(Counter(opp_list).items(),
                              columns=["tm_teamid", "n"])
        opp_predate_games = games.loc[
            (games["date"] < row["date"])
            & (games["tm_teamid"].isin(opp_list))
            & (games["opp_teamid"] != row["tm_teamid"])].merge(opp_df,
                                                               how="left",
                                                               on="tm_teamid")
        # Duplicate games per number of times they were played, get stats
        opp_predate_stats = opp_predate_games.loc[
            opp_predate_games.index.repeat(
                opp_predate_games["n"])][metric_cols].sum()

        return opp_predate_stats

    df[[f"opp_{x}"
        for x in metric_cols]] = df.apply(lambda row: opp_stats(row), axis=1)

    # Add output metrics to df
    # Pre-create columns for performance warning reasons
    output_metric_cols = [
        f"{t}{p}_{m}" for m in OUTPUT_METRIC_DICT.keys()
        for p in ["tm", "opp"] for t in ["", "oa_", "rnk_", "rnk_oa_"]
    ]

    df = pd.concat([df, pd.DataFrame(columns=output_metric_cols)], axis=1)

    output_metrics = []
    for metric, metric_details in OUTPUT_METRIC_DICT.items():
        log.debug(metric)
        for prefix in ["tm", "opp"]:
            output_metrics += [
                f"{prefix}_{metric}",
                f"oa_{prefix}_{metric}",
                f"rnk_{prefix}_{metric}",
                f"rnk_oa_{prefix}_{metric}",
            ]
            # Ascending metrics get flipped for the opponent's acumen at them
            calc_ascending = metric_details["ascending"]
            if prefix == "opp":
                calc_ascending = not calc_ascending

            # Get metric, OA metric
            df[f"{prefix}_{metric}"] = (
                df[f"{prefix}_{metric_details['num_col']}"] /
                df[f"{prefix}_{metric_details['den_col']}"])
            df[f"oa_{prefix}_{metric}"] = df[f"{prefix}_{metric}"] - (
                df[f"opp_{OTHERPREFIXMAP[prefix]}_{metric_details['num_col']}"]
                /
                df[f"opp_{OTHERPREFIXMAP[prefix]}_{metric_details['den_col']}"]
            )

            # Round to 6 decimals
            df[f"{prefix}_{metric}"] = np.round(df[f"{prefix}_{metric}"], 6)
            df[f"oa_{prefix}_{metric}"] = np.round(df[f"oa_{prefix}_{metric}"],
                                                   6)

            # Rank
            df[f"rnk_{prefix}_{metric}"] = df.groupby(
                ["date"])[f"{prefix}_{metric}"].rank(ascending=calc_ascending,
                                                     method="min")
            df[f"rnk_oa_{prefix}_{metric}"] = df.groupby([
                "date"
            ])[f"oa_{prefix}_{metric}"].rank(ascending=calc_ascending,
                                             method="min")

    # Trim to final columns
    out_first_cols = [
        "teamid",
        "game",
        "win",
        "loss",
        "poss",
        "mins",
    ]
    renames = {}
    for col in out_first_cols:
        renames[f"tm_{col}"] = col
    df.rename(columns=renames, inplace=True)

    out = df[["season", "date"] + out_first_cols + output_metric_cols]

    return out
Exemple #6
0
    out = df[["season", "date"] + out_first_cols + output_metric_cols]

    return out


# %%
if __name__ == "__main__":

    # Drop old table
    q = "drop table if exists teamdates"
    executeSql(q, "ncaa.db")

    # Create table

    # Loop through seasons in games
    for season in readSql("select distinct season from games")["season"]:
        log.info(f"{season} season")
        df = get_teamdates(season=season)

        dfToTable(
            df,
            table="teamdates",
            db="ncaa.db",
            ifExists="append",
            indexCols=["season", "date", "teamid"],
        )

    for p in get_unique_permutations(["season", "date", "teamid"]):
        executeSql(
            f"CREATE INDEX teamdates_{'_'.join(p)} on teamdates ({', '.join(p)})"
        )
Exemple #7
0
    all_years = sorted(td["season"].unique().tolist())
    for test_year in all_years:
        print(f"Testing {test_year} season")

        # Train-test split
        train_mask = td["season"] != test_year
        test_mask = td["season"] == test_year
        X_train = x_scale_pca[train_mask]
        X_test = x_scale_pca[test_mask]
        X_train_incl_line = x_scale_pca_incl_line[train_mask]
        X_test_incl_line = x_scale_pca_incl_line[test_mask]

        for id, model in reg_models.items():
            already_done = (
                readSql("Select distinct season, id from predictions")
                .to_numpy()
                .tolist()
            )
            if [test_year, id] in already_done:
                print(f"Skipping {id} for {test_year}")
                continue
            # Set up output
            td_test = td.loc[test_mask][[x for x in td.columns if x not in X_cols]]
            td_test["id"] = id
            td_test["config"] = str(model.get_params())

            # Get margin prediction
            y = td["tm_margin"]
            y_train = y[train_mask]
            y_test = y[test_mask]
            model.fit(X_train, y_train)
# %% Imports
import numpy as np
from helpers import readSql
import plotly.express as px


# %% Get and visualize regression models
q = """
select *
from predictions p
where substr(id, 3, 1) == 'r'
"""
regs = readSql(q)

# %% Select a config
q = "select distinct config from predictions"
cfg = readSql(q)["config"].to_list()
print("Please select a config option...")
for idx, val in enumerate(cfg):
    print(f"{idx}   |  {val}")
selected = int(input("Enter a num"))
sel_cfg = cfg[selected]

# %%
q = f"""
select 
    p.*
    ,v.tm_closeline
    ,v.tm_ml
    ,v.opp_ml
from predictions p