def test_init_scrapers(): scrapers = [ GameLogScraper("2018", GameLogSplit("hitting"), "csv"), IndividualOffenseScraper("2018", Split("overall"), "csv"), IndividualPitchingScraper("2018", Split("overall"), "csv"), TeamFieldingScraper("2018", Split("overall"), "csv"), TeamOffenseScraper("2018", Split("overall"), "csv"), TeamPitchingScraper("2018", Split("overall"), "csv"), ] for scraper in scrapers: assert isinstance(scraper, BaseScraper)
def test_init_etls(): etls = [ GameLogETL(2018, True, None), IndividualOffenseETL(2018, Split("overall"), True, None), IndividualPitchingETL(2018, Split("overall"), True, None), TeamOffenseETL(2018, Split("overall"), True, None), TeamPitchingETL(2018, Split("overall"), True, None), LeagueOffenseETL(2018, Split("overall"), True, None), LeaguePitchingETL(2018, Split("overall"), True, None), ] for etl in etls: assert etl
def inseason(stat: Tuple[int], split: str, output: str, verbose: bool) -> None: """Run scrapers for the inseason subcommand :param args: Arguments for the scrapers """ logging.info("Initializing scraping controller script") season = date.today().year year = utils.season_to_year(season) if split == "all": splits = list(Split) else: splits = [Split(split)] run_scrapers(list(stat), year, splits, output, inseason=True, verbose=verbose) logging.info("Scraping completed")
def final(year: List[int], stat: Tuple[int], split: str, output: str, verbose: bool) -> None: """Scrape end of the year final stats :param args: Arguments for the scrapers """ logging.info("Initializing scraping controller script") years = [utils.season_to_year(x) for x in year] if split == "all": splits = list(Split) else: splits = [Split(split)] for year_ in years: print("\nScraping:", year_, "\n") run_scrapers(list(stat), year_, splits, output, inseason=False, verbose=verbose) logging.info("Scraping completed")
def final( year: List[int], stat: Tuple[int], split: str, load: bool, verbose: bool ) -> None: """Run ETLs for the final subcommand :param args: Arguments for the ETLs :param conn: Database connection object """ config = utils.init_config() utils.init_logging(config["LOGGING"]) logging.info("Initializing cleaning controller script") conn = utils.connect_db(config["DB"]) if split == "all": splits = list(Split) else: splits = [Split(split)] for year_ in year: logging.info("Running ETLs for %s", year_) run_etls(list(stat), year_, splits, load, conn) conn.close() logging.info("Cleaning completed")
def test_clean_overall(self): scraper = IndividualPitchingScraper("2018-19", Split("overall"), "csv") raw_cols = [ "No.", "Name", "ERA", "W", "L", "APP", "GS", "CG", "SHO", "SV", "IP", "H", "R", "ER", "BB", "SO", "2B", "3B", "HR", "AB", "B/AVG", "WP", "HBP", "BK", "SFA", "SHA", "Yr", "Pos", "app", "gs", "w", "l", "sv", "cg", "ip", "h", "r", "er", "bb", "k", "k/9", "hr", "era", ] raw_df = pd.DataFrame( [ ( "31", "Nicholas Mathey", "2.28", "2", "0", "20", "0", "-", "-", "12", "43.1", "31", "15", "11", "8", "47", "2", "-", "2", "153", ".203", "5", "1", "-", "1", "2", "Sr.", "OF/P", "20", "0", "2", "0", "12", "-", "43.1", "31", "15", "11", "8", "47", "9.76", "2", "2.28", ), ( "32", "Michael Fidler", "2.53", "1", "1", "13", "0", "-", "-", "4", "21.1", "16", "6", "6", "9", "17", "4", "-", "1", "72", ".222", "5", "7", "-", "2", "2", "Sr.", "P/IF", "13", "0", "1", "1", "4", "-", "21.1", "16", "6", "6", "9", "17", "7.17", "1", "2.53", ), ], columns=raw_cols, ) expected_cols = [ "no", "name", "team", "season", "yr", "pos", "g", "gs", "w", "l", "sv", "cg", "sho", "ip", "h", "r", "er", "bb", "so", "era", "x2b", "x3b", "hr", "ab", "avg", "wp", "hbp", "bk", "sf", "sh", "so_9", ] expected = pd.DataFrame( [ ( "31", "Nicholas Mathey", "AUR", "2019", "Sr", "OF/P", "20", "0", "2", "0", "12", "0", "0", "43.1", "31", "15", "11", "8", "47", "2.28", "2", "0", "2", "153", ".203", "5", "1", "0", "1", "2", "9.76", ), ( "32", "Michael Fidler", "AUR", "2019", "Sr", "P/IF", "13", "0", "1", "1", "4", "0", "0", "21.1", "16", "6", "6", "9", "17", "2.53", "4", "0", "1", "72", ".222", "5", "7", "0", "2", "2", "7.17", ), ], columns=expected_cols, ) assert expected.equals(scraper._clean(raw_df, "AUR"))
def test_info(self): scraper = IndividualPitchingScraper("2018-19", Split("overall"), "csv") scraper.info()
def test_clean(self): scraper = IndividualOffenseScraper("2018-19", Split("overall"), "csv") raw_cols = [ "No.", "Name", "Yr", "Pos", "g", "ab", "r", "h", "2b", "3b", "hr", "rbi", "bb", "k", "sb", "cs", "avg", "obp", "slg", "hbp", "sf", "sh", "tb", "xbh", "hdp", "go", "fo", "go/fo", "pa", ] raw_df = pd.DataFrame( [ ( "3", "Jonathan Hodo", "So.", "INF", "41", "161", "23", "48", "7", "1", "-", "26", "12", "24", "11", "3", ".298", ".370", ".354", "7", "1", "-", "57", "8", "3", "40", "45", "0.89", "181", ), ( "9", "Jack Surin", "Fr.", "", "41", "150", "18", "39", "10", "-", "-", "22", "8", "16", "7", "-", ".260", ".307", ".327", "3", "2", "2", "49", "10", "2", "39", "55", "0.71", "165", ), ], columns=raw_cols, ) expected_cols = [ "no", "name", "team", "season", "yr", "pos", "g", "pa", "ab", "r", "h", "x2b", "x3b", "hr", "rbi", "bb", "so", "sb", "cs", "avg", "obp", "slg", "hbp", "sf", "sh", "tb", "xbh", "gdp", "go", "fo", "go_fo", ] expected = pd.DataFrame( [ ( "3", "Jonathan Hodo", "BEN", "2019", "So", "INF", "41", "181", "161", "23", "48", "7", "1", "0", "26", "12", "24", "11", "3", ".298", ".370", ".354", "7", "1", "0", "57", "8", "3", "40", "45", "0.89", ), ( "9", "Jack Surin", "BEN", "2019", "Fr", np.nan, "41", "165", "150", "18", "39", "10", "0", "0", "22", "8", "16", "7", "0", ".260", ".307", ".327", "3", "2", "2", "49", "10", "2", "39", "55", "0.71", ), ], columns=expected_cols, ) assert expected.equals(scraper._clean(raw_df, "BEN"))
def test_cant_instantiate_base_scraper(): with pytest.raises(TypeError): BaseScraper("2018", Split("overall"), "csv")