Ejemplo n.º 1
0
    def load(self) -> None:
        repl_table_name = "replacement_level_{}".format(self.split)
        if self.load_db:
            logging.info("Loading data into database")
            utils.db_load_data(
                self.replacement_totals,
                repl_table_name,
                self.conn,
                if_exists="append",
                index=True,
            )
        else:
            logging.info("Dumping to csv")
            fname = os.path.join(self.CSV_DIR,
                                 "{}.csv".format(repl_table_name))
            self.replacement_totals.to_csv(fname, index=True)

        table_name = "league_offense_{}".format(self.split)
        if self.load_db:
            logging.info("Loading data into database")
            utils.db_load_data(self.totals,
                               table_name,
                               self.conn,
                               if_exists="append",
                               index=True)
        else:
            logging.info("Dumping to csv")
            fname = os.path.join(self.CSV_DIR, "{}.csv".format(table_name))
            self.totals.to_csv(fname, index=True)
Ejemplo n.º 2
0
    def load(self) -> None:
        table = "game_log"
        if self.inseason:
            table += "_inseason"

        if self.load_db:
            logging.info("Loading data into database")
            utils.db_load_data(
                self.data, table, self.conn, if_exists="append", index=False
            )
        else:
            filename = table + ".csv"
            logging.info("Dumping to csv")
            self.data.to_csv(os.path.join(self.CSV_DIR, filename), index=False)
Ejemplo n.º 3
0
 def load(self) -> None:
     table_name = "league_pitching_{}".format(self.split)
     if self.load_db:
         logging.info("Loading data into database")
         utils.db_load_data(self.totals,
                            table_name,
                            self.conn,
                            if_exists="append",
                            index=True)
     else:
         logging.info("Dumping to csv")
         self.totals.to_csv(os.path.join(self.CSV_DIR,
                                         "{}.csv".format(table_name)),
                            index=True)
Ejemplo n.º 4
0
def cli(load: bool, clear: bool, season: Optional[int], dir: str) -> None:
    """Script entry point"""

    config = utils.init_config()
    utils.init_logging(config["LOGGING"])
    conn = utils.connect_db(config["DB"])

    batters = pd.read_sql_table("raw_batters_overall", conn)
    pitchers = pd.read_sql_table("raw_pitchers_overall", conn)
    corrections = pd.read_sql_table("name_corrections", conn)
    duplicates = get_duplicates(conn)

    batters = CleanFunctions.normalize_names(batters)
    pitchers = CleanFunctions.normalize_names(pitchers)

    batters = batters[["lname", "fname", "team", "season"]]
    pitchers = pitchers[["lname", "fname", "team", "season"]]

    # All batters and pitchers
    # (remove duplicates where a player batted and pitched in the same season)
    data = pd.merge(batters,
                    pitchers,
                    on=["fname", "lname", "team", "season"],
                    how="outer")
    data = data.sort_values(by=["lname", "fname", "team", "season"])

    data = CleanFunctions.apply_corrections(data, corrections)
    data = generate_ids(data, duplicates)

    if season:
        data = data[data["season"] == season]

    if load:
        if clear:
            print("Clearing database table")
            conn.execute("DELETE FROM player_id")

        print("Loading data into database")
        utils.db_load_data(data,
                           "player_id",
                           conn,
                           if_exists="append",
                           index=False)

    else:
        print("Dumping to csv")
        data.to_csv(os.path.join(dir, "player_id.csv"), index=False)
    conn.close()