Beispiel #1
0
def test_init_scrapers():
    scrapers = [
        GameLogScraper("2018", GameLogSplit("hitting"), "csv"),
        IndividualOffenseScraper("2018", Split("overall"), "csv"),
        IndividualPitchingScraper("2018", Split("overall"), "csv"),
        TeamFieldingScraper("2018", Split("overall"), "csv"),
        TeamOffenseScraper("2018", Split("overall"), "csv"),
        TeamPitchingScraper("2018", Split("overall"), "csv"),
    ]
    for scraper in scrapers:
        assert isinstance(scraper, BaseScraper)
Beispiel #2
0
def test_init_etls():
    etls = [
        GameLogETL(2018, True, None),
        IndividualOffenseETL(2018, Split("overall"), True, None),
        IndividualPitchingETL(2018, Split("overall"), True, None),
        TeamOffenseETL(2018, Split("overall"), True, None),
        TeamPitchingETL(2018, Split("overall"), True, None),
        LeagueOffenseETL(2018, Split("overall"), True, None),
        LeaguePitchingETL(2018, Split("overall"), True, None),
    ]
    for etl in etls:
        assert etl
Beispiel #3
0
def inseason(stat: Tuple[int], split: str, output: str, verbose: bool) -> None:
    """Run scrapers for the inseason subcommand

    :param args: Arguments for the scrapers
    """
    logging.info("Initializing scraping controller script")
    season = date.today().year
    year = utils.season_to_year(season)

    if split == "all":
        splits = list(Split)
    else:
        splits = [Split(split)]

    run_scrapers(list(stat),
                 year,
                 splits,
                 output,
                 inseason=True,
                 verbose=verbose)
    logging.info("Scraping completed")
Beispiel #4
0
def final(year: List[int], stat: Tuple[int], split: str, output: str,
          verbose: bool) -> None:
    """Scrape end of the year final stats

    :param args: Arguments for the scrapers
    """
    logging.info("Initializing scraping controller script")
    years = [utils.season_to_year(x) for x in year]

    if split == "all":
        splits = list(Split)
    else:
        splits = [Split(split)]

    for year_ in years:
        print("\nScraping:", year_, "\n")

        run_scrapers(list(stat),
                     year_,
                     splits,
                     output,
                     inseason=False,
                     verbose=verbose)
    logging.info("Scraping completed")
Beispiel #5
0
def final(
    year: List[int], stat: Tuple[int], split: str, load: bool, verbose: bool
) -> None:
    """Run ETLs for the final subcommand

    :param args: Arguments for the ETLs
    :param conn: Database connection object
    """
    config = utils.init_config()
    utils.init_logging(config["LOGGING"])
    logging.info("Initializing cleaning controller script")
    conn = utils.connect_db(config["DB"])

    if split == "all":
        splits = list(Split)
    else:
        splits = [Split(split)]

    for year_ in year:
        logging.info("Running ETLs for %s", year_)
        run_etls(list(stat), year_, splits, load, conn)

    conn.close()
    logging.info("Cleaning completed")
Beispiel #6
0
 def test_clean_overall(self):
     scraper = IndividualPitchingScraper("2018-19", Split("overall"), "csv")
     raw_cols = [
         "No.",
         "Name",
         "ERA",
         "W",
         "L",
         "APP",
         "GS",
         "CG",
         "SHO",
         "SV",
         "IP",
         "H",
         "R",
         "ER",
         "BB",
         "SO",
         "2B",
         "3B",
         "HR",
         "AB",
         "B/AVG",
         "WP",
         "HBP",
         "BK",
         "SFA",
         "SHA",
         "Yr",
         "Pos",
         "app",
         "gs",
         "w",
         "l",
         "sv",
         "cg",
         "ip",
         "h",
         "r",
         "er",
         "bb",
         "k",
         "k/9",
         "hr",
         "era",
     ]
     raw_df = pd.DataFrame(
         [
             (
                 "31",
                 "Nicholas Mathey",
                 "2.28",
                 "2",
                 "0",
                 "20",
                 "0",
                 "-",
                 "-",
                 "12",
                 "43.1",
                 "31",
                 "15",
                 "11",
                 "8",
                 "47",
                 "2",
                 "-",
                 "2",
                 "153",
                 ".203",
                 "5",
                 "1",
                 "-",
                 "1",
                 "2",
                 "Sr.",
                 "OF/P",
                 "20",
                 "0",
                 "2",
                 "0",
                 "12",
                 "-",
                 "43.1",
                 "31",
                 "15",
                 "11",
                 "8",
                 "47",
                 "9.76",
                 "2",
                 "2.28",
             ),
             (
                 "32",
                 "Michael Fidler",
                 "2.53",
                 "1",
                 "1",
                 "13",
                 "0",
                 "-",
                 "-",
                 "4",
                 "21.1",
                 "16",
                 "6",
                 "6",
                 "9",
                 "17",
                 "4",
                 "-",
                 "1",
                 "72",
                 ".222",
                 "5",
                 "7",
                 "-",
                 "2",
                 "2",
                 "Sr.",
                 "P/IF",
                 "13",
                 "0",
                 "1",
                 "1",
                 "4",
                 "-",
                 "21.1",
                 "16",
                 "6",
                 "6",
                 "9",
                 "17",
                 "7.17",
                 "1",
                 "2.53",
             ),
         ],
         columns=raw_cols,
     )
     expected_cols = [
         "no",
         "name",
         "team",
         "season",
         "yr",
         "pos",
         "g",
         "gs",
         "w",
         "l",
         "sv",
         "cg",
         "sho",
         "ip",
         "h",
         "r",
         "er",
         "bb",
         "so",
         "era",
         "x2b",
         "x3b",
         "hr",
         "ab",
         "avg",
         "wp",
         "hbp",
         "bk",
         "sf",
         "sh",
         "so_9",
     ]
     expected = pd.DataFrame(
         [
             (
                 "31",
                 "Nicholas Mathey",
                 "AUR",
                 "2019",
                 "Sr",
                 "OF/P",
                 "20",
                 "0",
                 "2",
                 "0",
                 "12",
                 "0",
                 "0",
                 "43.1",
                 "31",
                 "15",
                 "11",
                 "8",
                 "47",
                 "2.28",
                 "2",
                 "0",
                 "2",
                 "153",
                 ".203",
                 "5",
                 "1",
                 "0",
                 "1",
                 "2",
                 "9.76",
             ),
             (
                 "32",
                 "Michael Fidler",
                 "AUR",
                 "2019",
                 "Sr",
                 "P/IF",
                 "13",
                 "0",
                 "1",
                 "1",
                 "4",
                 "0",
                 "0",
                 "21.1",
                 "16",
                 "6",
                 "6",
                 "9",
                 "17",
                 "2.53",
                 "4",
                 "0",
                 "1",
                 "72",
                 ".222",
                 "5",
                 "7",
                 "0",
                 "2",
                 "2",
                 "7.17",
             ),
         ],
         columns=expected_cols,
     )
     assert expected.equals(scraper._clean(raw_df, "AUR"))
Beispiel #7
0
 def test_info(self):
     scraper = IndividualPitchingScraper("2018-19", Split("overall"), "csv")
     scraper.info()
Beispiel #8
0
 def test_clean(self):
     scraper = IndividualOffenseScraper("2018-19", Split("overall"), "csv")
     raw_cols = [
         "No.",
         "Name",
         "Yr",
         "Pos",
         "g",
         "ab",
         "r",
         "h",
         "2b",
         "3b",
         "hr",
         "rbi",
         "bb",
         "k",
         "sb",
         "cs",
         "avg",
         "obp",
         "slg",
         "hbp",
         "sf",
         "sh",
         "tb",
         "xbh",
         "hdp",
         "go",
         "fo",
         "go/fo",
         "pa",
     ]
     raw_df = pd.DataFrame(
         [
             (
                 "3",
                 "Jonathan  Hodo",
                 "So.",
                 "INF",
                 "41",
                 "161",
                 "23",
                 "48",
                 "7",
                 "1",
                 "-",
                 "26",
                 "12",
                 "24",
                 "11",
                 "3",
                 ".298",
                 ".370",
                 ".354",
                 "7",
                 "1",
                 "-",
                 "57",
                 "8",
                 "3",
                 "40",
                 "45",
                 "0.89",
                 "181",
             ),
             (
                 "9",
                 "Jack  Surin",
                 "Fr.",
                 "",
                 "41",
                 "150",
                 "18",
                 "39",
                 "10",
                 "-",
                 "-",
                 "22",
                 "8",
                 "16",
                 "7",
                 "-",
                 ".260",
                 ".307",
                 ".327",
                 "3",
                 "2",
                 "2",
                 "49",
                 "10",
                 "2",
                 "39",
                 "55",
                 "0.71",
                 "165",
             ),
         ],
         columns=raw_cols,
     )
     expected_cols = [
         "no",
         "name",
         "team",
         "season",
         "yr",
         "pos",
         "g",
         "pa",
         "ab",
         "r",
         "h",
         "x2b",
         "x3b",
         "hr",
         "rbi",
         "bb",
         "so",
         "sb",
         "cs",
         "avg",
         "obp",
         "slg",
         "hbp",
         "sf",
         "sh",
         "tb",
         "xbh",
         "gdp",
         "go",
         "fo",
         "go_fo",
     ]
     expected = pd.DataFrame(
         [
             (
                 "3",
                 "Jonathan  Hodo",
                 "BEN",
                 "2019",
                 "So",
                 "INF",
                 "41",
                 "181",
                 "161",
                 "23",
                 "48",
                 "7",
                 "1",
                 "0",
                 "26",
                 "12",
                 "24",
                 "11",
                 "3",
                 ".298",
                 ".370",
                 ".354",
                 "7",
                 "1",
                 "0",
                 "57",
                 "8",
                 "3",
                 "40",
                 "45",
                 "0.89",
             ),
             (
                 "9",
                 "Jack  Surin",
                 "BEN",
                 "2019",
                 "Fr",
                 np.nan,
                 "41",
                 "165",
                 "150",
                 "18",
                 "39",
                 "10",
                 "0",
                 "0",
                 "22",
                 "8",
                 "16",
                 "7",
                 "0",
                 ".260",
                 ".307",
                 ".327",
                 "3",
                 "2",
                 "2",
                 "49",
                 "10",
                 "2",
                 "39",
                 "55",
                 "0.71",
             ),
         ],
         columns=expected_cols,
     )
     assert expected.equals(scraper._clean(raw_df, "BEN"))
Beispiel #9
0
def test_cant_instantiate_base_scraper():
    with pytest.raises(TypeError):
        BaseScraper("2018", Split("overall"), "csv")